Pipeline:
Loading Data and Packages
Quality control of Raw Counts
Filtering and Normalization
Quality control of data, post filering and normalization
Set up design matrix and contrasts
Estimate Dispersion and Fit the Model
Perform Differential Expression Tests
Visualization plots
1) Loading Data and Packages
# Set working directory to project folder
setwd("~/Documents/Project/Salivary_glands_project")
# Load required libraries
library(edgeR) # Differential expression analysis
library(ggplot2) # Data visualization
library(RColorBrewer) # Color palettes
library(reshape2) # Data reshaping
library(pheatmap) # Heatmap visualization
library(ggrepel) # Enhanced text labels in ggplot2
library(kableExtra) # Enhanced tables
library(clusterProfiler) # Functional profiling of gene clusters
library(org.Mm.eg.db) # Mouse genome annotation
library(ReactomePA) # Reactome pathway analysis
library(annotables) # Gene annotations
library(tidyverse) # Data manipulation and visualization
library(AnnotationDbi) # Annotation interface
library(EnhancedVolcano) # Enhanced volcano plots
library(enrichplot) # Enrichment visualization
library(ggforce) # For bubble shapes around clusters
library(msigdbr) # For MSigDB gene sets
library(ggridges) # Ridge Plots
library(KEGGREST) #Allows access to the KEGG REST API
# Set working directory
setwd("~/Documents/Project/Salivary_glands_project") # Update this path as necessary
# Imported raw count data from featurecounts tool
Data<- read.csv("readCounts2.tsv",
header = TRUE,
sep = '\t',
row.names = 'Geneid',
skip=1)
# View(Data)
# Dropped extra columns from featureCount output
drops <- c("Chr","Start","End","Strand","Length")
# head(drops)
Data <- Data[ , !(names(Data) %in% drops)]
# colnames(Data)
# Converted count data to matrix and ensure integer mode
countData <- as.matrix(Data)
mode(countData) <- "integer"
# View(countData)
# Loaded colData as metadata
metadata <- read.csv("info.csv", sep=",", row.names=1)
# Displayed first 50 rows of metadata
kable(head(metadata, 50), digits=5) %>% kable_styling("striped", full_width = FALSE, position = "center") %>% scroll_box(width = "55%", height = "500px")
| Strain | Glands | Sex | Sample.1 | |
|---|---|---|---|---|
| Par_1A_Mal | CD1 | Par | Male | Par_1A_Mal |
| Par_1B_Mal | C57 | Par | Male | Par_1B_Mal |
| Par_2A_Mal | CD1 | Par | Male | Par_2A_Mal |
| Par_2B_Mal | C57 | Par | Male | Par_2B_Mal |
| Par_3A_Mal | CD1 | Par | Male | Par_3A_Mal |
| Par_3B_Mal | C57 | Par | Male | Par_3B_Mal |
| Par_4A_Fem | CD1 | Par | Female | Par_4A_Fem |
| Par_4B_Fem | C57 | Par | Female | Par_4B_Fem |
| Par_5A_Fem | CD1 | Par | Female | Par_5A_Fem |
| Par_5B_Fem | C57 | Par | Female | Par_5B_Fem |
| Par_6A_Fem | CD1 | Par | Female | Par_6A_Fem |
| Par_6B_Fem | C57 | Par | Female | Par_6B_Fem |
| SL_1A_Mal | CD1 | SL | Male | SL_1A_Mal |
| SL_1B_Mal | C57 | SL | Male | SL_1B_Mal |
| SL_2A_Mal | CD1 | SL | Male | SL_2A_Mal |
| SL_2B_Mal | C57 | SL | Male | SL_2B_Mal |
| SL_3A_Mal | CD1 | SL | Male | SL_3A_Mal |
| SL_3B_Mal | C57 | SL | Male | SL_3B_Mal |
| SL_4A_Fem | CD1 | SL | Female | SL_4A_Fem |
| SL_4B_Fem | C57 | SL | Female | SL_4B_Fem |
| SL_5A_Fem | CD1 | SL | Female | SL_5A_Fem |
| SL_5B_Fem | C57 | SL | Female | SL_5B_Fem |
| SL_6A_Fem | CD1 | SL | Female | SL_6A_Fem |
| SL_6B_Fem | C57 | SL | Female | SL_6B_Fem |
| SM_1A_Mal | CD1 | SM | Male | SM_1A_Mal |
| SM_1B_Mal | C57 | SM | Male | SM_1B_Mal |
| SM_2A_Mal | CD1 | SM | Male | SM_2A_Mal |
| SM_2B_Mal | C57 | SM | Male | SM_2B_Mal |
| SM_3A_Mal | CD1 | SM | Male | SM_3A_Mal |
| SM_3B_Mal | C57 | SM | Male | SM_3B_Mal |
| SM_4A_Fem | CD1 | SM | Female | SM_4A_Fem |
| SM_4B_Fem | C57 | SM | Female | SM_4B_Fem |
| SM_5A_Fem | CD1 | SM | Female | SM_5A_Fem |
| SM_5B_Fem | C57 | SM | Female | SM_5B_Fem |
| SM_6A_Fem | CD1 | SM | Female | SM_6A_Fem |
| SM_6B_Fem | C57 | SM | Female | SM_6B_Fem |
# Checked all sample IDs in colData are also in CountData
all(rownames(metadata) %in% colnames(countData))
## [1] TRUE
# Checking and matching the order of countData and metadata
countData <- countData[, rownames(metadata)]
all(rownames(metadata) == colnames(countData))
## [1] TRUE
2) Quality Control: Raw Counts
# Created a DGEList object with raw counts for downstream analysis and quality control
raw_counts <- DGEList(counts = countData)
# Calculated and summarized library sizes
library_sizes <- colSums(raw_counts$counts)
summary(library_sizes)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 30179848 36029489 39188736 40403424 42433542 65308671
# Created a barplot to vizualize library sizes
ggplot(data = data.frame(Sample = names(library_sizes), Counts = library_sizes),
aes(x = Sample, y = Counts)) +
geom_bar(stat = "identity", fill = "skyblue") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Library Sizes", x = "Sample", y = "Total Counts")
# Calculated log2 CPM values from raw counts
log_cpm <- cpm(raw_counts, log = TRUE, prior.count = 1)
# Prepared data for boxplot viz.
log_cpm_long <- melt(as.data.frame(log_cpm), id.vars = NULL, variable.name = "Sample", value.name = "Log2_CPM")
log_cpm_long$Strain <- metadata$Strain[match(log_cpm_long$Sample, rownames(metadata))]
# Plotted boxplots of gene expression distributions
ggplot(log_cpm_long, aes(x = Sample, y = Log2_CPM, fill = Strain)) +
geom_boxplot(outlier.shape = NA, alpha = 0.7) +
scale_fill_brewer(palette = "Set1") +
labs(title = "Gene Expression Distribution (Raw Counts)", x = "Sample", y = "Log2(CPM + 1)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, size = 14)) +
guides(fill = guide_legend(title = "Strain"))
# Calculated Pearson correlation matrix
cor_matrix <- cor(log_cpm, method = "pearson")
# Defined a color palette for heatmap
color_palette <- colorRampPalette(brewer.pal(9, "RdYlBu"))(50)
# Prepared annotation for heatmap
annotation_col <- data.frame(
Strain = metadata$Strain,
Glands = metadata$Glands,
Sex = metadata$Sex
)
rownames(annotation_col) <- rownames(metadata)
# Generated correlation heatmap with annotations
pheatmap(cor_matrix,
main = "Pearson Correlation Heatmap (Raw Counts)",
color = color_palette,
clustering_distance_rows = "correlation",
clustering_distance_cols = "correlation",
border_color = NA,
display_numbers = FALSE,
fontsize_row = 8,
fontsize_col = 9,
annotation_col = annotation_col,
fontsize = 10)
4. Euclidean Distance
# Calculated Euclidean distance matrix
dist_matrix <- dist(t(log_cpm), method = "euclidean")
dist_matrix_full <- as.matrix(dist_matrix)
# Plotted heatmap with annotations, used the same color palette as the heatmap for Pearson correlation
pheatmap(dist_matrix_full,
main = "Euclidean Distance Heatmap (Raw Counts)",
color = color_palette,
border_color = NA,
display_numbers = FALSE,
fontsize_row = 8,
fontsize_col = 9,
annotation_col = annotation_col,
fontsize = 10)
# Defined color palette for Glands and Sex
gland_colors <- c("Par" = "cyan2", "SL" = "seagreen1", "SM" = "lightcoral") # Assign colors to each gland type
sex_colors <- c("Male" = "purple", "Female" = "red") # Colors for Sex
# Performed PCA
pca <- prcomp(t(log_cpm), scale. = TRUE)
# Extracted PCA data and calculated variance explained
pca_data <- as.data.frame(pca$x)
percentVar <- round(100 * (pca$sdev^2 / sum(pca$sdev^2)), 1)
# Added sample metadata to PCA results
pca_data$SampleID <- rownames(pca_data)
pca_data$Strain <- metadata$Strain[match(pca_data$SampleID, rownames(metadata))]
pca_data$Glands <- metadata$Glands[match(pca_data$SampleID, rownames(metadata))]
pca_data$Sex <- metadata$Sex[match(pca_data$SampleID, rownames(metadata))]
# Plot PCA
ggplot(pca_data, aes(x = PC1, y = PC2, color = Glands, shape = Glands)) +
geom_point(size = 3) +
# geom_text_repel(size = 3, fontface = "bold", max.overlaps = 10, box.padding = 0.3, point.padding = 0.5) +
scale_color_manual(values = gland_colors) +
scale_shape_manual(values = c("Par" = 16, "SL" = 17, "SM" = 15)) + # Different shapes for each gland type
xlab(paste0("PC1: ", percentVar[1], "% variance")) +
ylab(paste0("PC2: ", percentVar[2], "% variance")) +
labs(title = "PCA Plot of Gene Expression Data (Raw Counts)") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
strip.text = element_text(size = 12, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 10),
legend.position = "right"
) +
guides(color = guide_legend(title = "Glands"),
shape = guide_legend(title = "Glands")) +
geom_hline(yintercept = 0, linetype = "dashed", color = "grey50") +
geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
geom_mark_ellipse(aes(fill = Sex), alpha = 0.1, color = NA) + # Ellipses filled based on Sex
facet_wrap(~ Strain) + # Facet by Strain
scale_fill_manual(values = sex_colors) + # Custom fill colors for ellipses based on Sex
theme(panel.grid.major = element_line(color = "grey80"),
panel.grid.minor = element_blank(),
panel.border = element_blank())
3) Filtering and Normalization
# Created group factor based on gland types
group <- factor(metadata$Glands)
# Created DGEList object with group information
y <- DGEList(counts = countData, group = group)
# Filtered lowly expressed genes
keep <- filterByExpr(y)
y <- y[keep, , keep.lib.sizes = FALSE]
# Normalized the data using TMM method
y <- calcNormFactors(y)
# Displayed the first 50 rows of the normalized sample information
kable(head(y$samples, 50), digits=5) %>% kable_styling("striped", full_width = FALSE, position = "center") %>% scroll_box(width = "50%", height = "500px")
| group | lib.size | norm.factors | |
|---|---|---|---|
| Par_1A_Mal | Par | 49346283 | 1.04225 |
| Par_1B_Mal | Par | 41958714 | 0.95690 |
| Par_2A_Mal | Par | 39588858 | 0.71979 |
| Par_2B_Mal | Par | 38521344 | 0.64772 |
| Par_3A_Mal | Par | 65280666 | 0.77639 |
| Par_3B_Mal | Par | 38812928 | 0.83174 |
| Par_4A_Fem | Par | 50136011 | 0.56364 |
| Par_4B_Fem | Par | 40097846 | 0.56423 |
| Par_5A_Fem | Par | 39029949 | 1.38403 |
| Par_5B_Fem | Par | 36458387 | 1.01408 |
| Par_6A_Fem | Par | 42659603 | 0.92477 |
| Par_6B_Fem | Par | 39232788 | 0.52092 |
| SL_1A_Mal | SL | 48441001 | 1.46385 |
| SL_1B_Mal | SL | 40594504 | 1.23048 |
| SL_2A_Mal | SL | 43189949 | 1.59504 |
| SL_2B_Mal | SL | 44423292 | 1.24701 |
| SL_3A_Mal | SL | 33139664 | 1.51100 |
| SL_3B_Mal | SL | 38365403 | 1.31636 |
| SL_4A_Fem | SL | 46153401 | 1.48888 |
| SL_4B_Fem | SL | 38555200 | 1.40968 |
| SL_5A_Fem | SL | 32477171 | 1.46581 |
| SL_5B_Fem | SL | 42336706 | 1.52588 |
| SL_6A_Fem | SL | 41094426 | 1.45802 |
| SL_6B_Fem | SL | 33817200 | 1.32851 |
| SM_1A_Mal | SM | 30173355 | 0.62333 |
| SM_1B_Mal | SM | 33349955 | 0.95360 |
| SM_2A_Mal | SM | 41966995 | 0.86098 |
| SM_2B_Mal | SM | 39124401 | 0.94713 |
| SM_3A_Mal | SM | 34166773 | 1.00972 |
| SM_3B_Mal | SM | 36254282 | 1.07864 |
| SM_4A_Fem | SM | 32913394 | 0.89073 |
| SM_4B_Fem | SM | 54266434 | 0.72644 |
| SM_5A_Fem | SM | 36841471 | 1.03019 |
| SM_5B_Fem | SM | 35307553 | 0.94457 |
| SM_6A_Fem | SM | 34370981 | 0.82847 |
| SM_6B_Fem | SM | 41257935 | 0.86801 |
# Calculated log2 CPM values after normalization
log_cpm_norm <- cpm(y, log=TRUE, prior.count=1)
4) Quality Control: Post normalization
# Prepared the data for visualization
log_cpm_norm_long <- melt(as.data.frame(log_cpm_norm), id.vars = NULL, variable.name = "Sample", value.name = "Log2_CPM")
log_cpm_norm_long$Strain <- metadata$Strain[match(log_cpm_norm_long$Sample, rownames(metadata))]
# Plotted boxplots of gene expression distributions
ggplot(log_cpm_norm_long, aes(x = Sample, y = Log2_CPM, fill = Strain)) +
geom_boxplot(outlier.shape = NA, alpha = 0.7) +
scale_fill_brewer(palette = "Set1") +
labs(title = "Gene Expression Distribution (Raw Counts)", x = "Sample", y = "Log2(CPM + 1)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, size = 14)) +
guides(fill = guide_legend(title = "Strain"))
# Calculated Pearson correlation matrix for normalized data
cor_matrix_norm <- cor(log_cpm_norm, method = "pearson")
# Generated heatmap to vizualize sample correlation using pearson method
pheatmap(cor_matrix_norm,
main = "Pearson Correlation Heatmap (Normalized Counts)",
color = color_palette,
clustering_distance_rows = "correlation",
clustering_distance_cols = "correlation",
border_color = NA,
display_numbers = FALSE,
fontsize_row = 8,
fontsize_col = 9,
annotation_col = annotation_col, # Strain, Glands, Sex
fontsize = 10)
# Calculated Euclidean distance matrix
dist_matrix_norm <- dist(t(log_cpm_norm), method = "euclidean")
dist_matrix_full_norm <- as.matrix(dist_matrix_norm)
# Plot heatmap with annotations, used same color palette as pearson heatmap
pheatmap(dist_matrix_full_norm,
main = "Euclidean Distance Heatmap (Normalized Counts)",
color = color_palette,
border_color = NA,
display_numbers = FALSE,
fontsize_row = 8,
fontsize_col = 9,
annotation_col = annotation_col, # Strain, Glands, Sex
fontsize = 10)
# Define color palette for Glands and Sex
strain_colors <- c("Par" = "cyan2", "SL" = "seagreen1", "SM" = "lightcoral") # Assign colors to each gland type
sex_colors <- c("Male" = "purple", "Female" = "red") # Colors for Sex
# Performed PCA
pca_norm <- prcomp(t(log_cpm_norm), scale. = TRUE)
# Extracted PCA data and calculate variance explained
pca_data_norm <- as.data.frame(pca_norm$x)
percentVar_norm <- round(100 * (pca_norm$sdev^2 / sum(pca_norm$sdev^2)), 1)
# Added sample metadata to PCA results
pca_data_norm$SampleID <- rownames(pca_data_norm)
pca_data_norm$Strain <- metadata$Strain[match(pca_data_norm$SampleID, metadata$Sample)]
pca_data_norm$Glands <- metadata$Glands[match(pca_data_norm$SampleID, metadata$Sample)]
pca_data_norm$Sex <- metadata$Sex[match(pca_data_norm$SampleID, metadata$Sample)]
# Generated PCA plot and vizualized it using ggplot2
ggplot(pca_data_norm, aes(x = PC1, y = PC2, color = Glands, shape = Glands)) +
geom_point(size = 3) +
# geom_text_repel(size = 3, fontface = "bold", max.overlaps = 10, box.padding = 0.3, point.padding = 0.5) +
scale_color_manual(values = strain_colors) +
xlab(paste0("PC1: ", percentVar_norm[1], "% variance")) +
ylab(paste0("PC2: ", percentVar_norm[2], "% variance")) +
labs(title = "PCA Plot of Gene Expression Data (Normalized Counts)") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
strip.text = element_text(size = 12, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 10),
legend.position = "right"
) +
guides(color = guide_legend(title = "Glands"),
shape = guide_legend(title = "Glands")) +
geom_hline(yintercept = 0, linetype = "dashed", color = "grey50") +
geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
geom_mark_ellipse(aes(fill = Sex), alpha = 0.1, color = NA) +
facet_wrap(~ Strain) + # Facet by Strain
scale_fill_manual(values = sex_colors) +
theme(panel.grid.major = element_line(color = "grey80"),
panel.grid.minor = element_blank(),
panel.border = element_blank())
4) Set Up the Design Matrix and Contrasts
# Converted the Glands column in metadata to a factor
metadata$Glands <- factor(metadata$Glands)
# Viewed the levels of Glands
levels(metadata$Glands)
## [1] "Par" "SL" "SM"
# Set up the design matrix without an intercept (for pairwise comparisons)
design <- model.matrix(~ 0 + Glands, data = metadata)
colnames(design) <- levels(metadata$Glands) # Name columns as Glands
# Displayed the first 60 rows of the design matrix
kable(head(design, 55), digits=5) %>% kable_styling("striped", full_width = FALSE) %>% scroll_box(width = "32%", height = "500px")
| Par | SL | SM | |
|---|---|---|---|
| Par_1A_Mal | 1 | 0 | 0 |
| Par_1B_Mal | 1 | 0 | 0 |
| Par_2A_Mal | 1 | 0 | 0 |
| Par_2B_Mal | 1 | 0 | 0 |
| Par_3A_Mal | 1 | 0 | 0 |
| Par_3B_Mal | 1 | 0 | 0 |
| Par_4A_Fem | 1 | 0 | 0 |
| Par_4B_Fem | 1 | 0 | 0 |
| Par_5A_Fem | 1 | 0 | 0 |
| Par_5B_Fem | 1 | 0 | 0 |
| Par_6A_Fem | 1 | 0 | 0 |
| Par_6B_Fem | 1 | 0 | 0 |
| SL_1A_Mal | 0 | 1 | 0 |
| SL_1B_Mal | 0 | 1 | 0 |
| SL_2A_Mal | 0 | 1 | 0 |
| SL_2B_Mal | 0 | 1 | 0 |
| SL_3A_Mal | 0 | 1 | 0 |
| SL_3B_Mal | 0 | 1 | 0 |
| SL_4A_Fem | 0 | 1 | 0 |
| SL_4B_Fem | 0 | 1 | 0 |
| SL_5A_Fem | 0 | 1 | 0 |
| SL_5B_Fem | 0 | 1 | 0 |
| SL_6A_Fem | 0 | 1 | 0 |
| SL_6B_Fem | 0 | 1 | 0 |
| SM_1A_Mal | 0 | 0 | 1 |
| SM_1B_Mal | 0 | 0 | 1 |
| SM_2A_Mal | 0 | 0 | 1 |
| SM_2B_Mal | 0 | 0 | 1 |
| SM_3A_Mal | 0 | 0 | 1 |
| SM_3B_Mal | 0 | 0 | 1 |
| SM_4A_Fem | 0 | 0 | 1 |
| SM_4B_Fem | 0 | 0 | 1 |
| SM_5A_Fem | 0 | 0 | 1 |
| SM_5B_Fem | 0 | 0 | 1 |
| SM_6A_Fem | 0 | 0 | 1 |
| SM_6B_Fem | 0 | 0 | 1 |
5) Estimate Dispersion and Fit the Model
# Assigned the design matrix to the DGEList object and estimated dispersion
y <- estimateDisp(y, design)
# Plotted BCV (Biological Coefficient of Variation) to assess dispersion estimates
plotBCV(y)
# Fitted the generalized linear model using the quasi-likelihood method
fit_qlf <- glmQLFit(y, design)
Define Contrasts for Pairwise Comparisons
# Defined the contrasts
contrast_matrix <- makeContrasts(
Par_vs_SL = Par - SL,
Par_vs_SM = Par - SM,
SL_vs_SM = SL - SM,
levels = design
)
# View the contrast matrix
head(contrast_matrix)
## Contrasts
## Levels Par_vs_SL Par_vs_SM SL_vs_SM
## Par 1 1 0
## SL -1 0 1
## SM 0 -1 -1
7) Performing Differential Expression Tests
Using the contrasts to perform the quasi-likelihood F-tests for each comparison. Function “glmQLFTest” performs quasi-likelihood F-test for each contrast
# quasi-likelihood F-test were performed for each contrast individually
# Contrast 1: Par vs SL
qlf_Par_vs_SL <- glmQLFTest(fit_qlf, contrast = contrast_matrix[,"Par_vs_SL"])
# Contrast 2: Par vs SM
qlf_Par_vs_SM <- glmQLFTest(fit_qlf, contrast = contrast_matrix[,"Par_vs_SM"])
# Contrast 3: SL vs SM
qlf_SL_vs_SM <- glmQLFTest(fit_qlf, contrast = contrast_matrix[,"SL_vs_SM"])
Identify Upregulated and Downregulated Genes for Each Contrast
topTags function in edgeR extracts and organizes differentially expressed genes from statistical test results.
# Number of genes to display/save was set (n=Inf to include all genes)
n_genes <- Inf
# Par vs SL
topTags_Par_vs_SL <- topTags(qlf_Par_vs_SL, n = n_genes) #A variable topTags_Par_vs_SL is created where the extracted top genes are stored.
results_Par_vs_SL <- topTags_Par_vs_SL$table #results_Par_vs_SL variable holds table from topTags object
results_Par_vs_SL$Gene_names <- rownames(results_Par_vs_SL) #Rownames that contains gene names now have their own column called gene names. This column will help during vizualization.
# Par vs SM
topTags_Par_vs_SM <- topTags(qlf_Par_vs_SM, n = n_genes)
results_Par_vs_SM <- topTags_Par_vs_SM$table
results_Par_vs_SM$Gene_names <- rownames(results_Par_vs_SM)
# SL vs SM
topTags_SL_vs_SM <- topTags(qlf_SL_vs_SM, n = n_genes)
results_SL_vs_SM <- topTags_SL_vs_SM$table
results_SL_vs_SM$Gene_names <- rownames(results_SL_vs_SM)
FDR of 5% is examined with the function “decideTests”
# Par vs SL
kable(head(results_Par_vs_SL, 20), digits=50, caption = "Top 20 DE Genes: Par vs SL") %>%
kable_styling("striped", full_width = FALSE, position = "center") %>% scroll_box(width = "80%", height = "500px")
| logFC | logCPM | F | PValue | FDR | Gene_names | |
|---|---|---|---|---|---|---|
| Tmco3 | -4.155307 | 7.0623389 | 919.5926 | 2.216414e-27 | 2.202153e-23 | Tmco3 |
| Cldn24 | 7.144283 | 1.4461368 | 384.7686 | 4.570061e-27 | 2.202153e-23 | Cldn24 |
| Nkx2-3 | -6.701885 | 5.4486211 | 885.1794 | 5.173102e-27 | 2.202153e-23 | Nkx2-3 |
| Sox2 | -6.312474 | 4.9650560 | 879.0124 | 5.741875e-27 | 2.202153e-23 | Sox2 |
| Galnt4 | -3.587133 | 6.5218741 | 798.5773 | 2.594106e-26 | 7.959235e-23 | Galnt4 |
| Casc4 | -4.808568 | 5.8041599 | 777.0299 | 4.198472e-26 | 1.073479e-22 | Casc4 |
| Isl2 | -6.851417 | 3.1653075 | 711.9135 | 1.519104e-25 | 3.329225e-22 | Isl2 |
| Hpse2 | -6.216574 | 2.0492147 | 572.3787 | 1.949171e-25 | 3.651888e-22 | Hpse2 |
| Liph | -5.637708 | 5.9470991 | 707.4812 | 2.142428e-25 | 3.651888e-22 | Liph |
| Bace2 | -3.638722 | 5.9502284 | 684.0296 | 3.793755e-25 | 5.337132e-22 | Bace2 |
| Syt7 | -5.785481 | 6.2686885 | 679.6713 | 4.311307e-25 | 5.337132e-22 | Syt7 |
| St3gal6 | 6.021131 | 6.0325314 | 678.8974 | 4.346841e-25 | 5.337132e-22 | St3gal6 |
| Tgoln1 | -3.065223 | 7.5018066 | 677.0345 | 4.522698e-25 | 5.337132e-22 | Tgoln1 |
| Gcc2 | -2.140715 | 5.5046809 | 663.5392 | 6.400906e-25 | 7.014022e-22 | Gcc2 |
| Vwf | -3.636105 | 6.3349891 | 655.6737 | 7.867471e-25 | 8.046324e-22 | Vwf |
| Ptprt | -5.990776 | 1.8541562 | 550.9769 | 1.126969e-24 | 1.080552e-21 | Ptprt |
| Heatr5a | -2.458494 | 5.7820960 | 619.3447 | 2.094741e-24 | 1.890319e-21 | Heatr5a |
| Slc44a4 | -5.322553 | 6.4317854 | 614.5393 | 2.400135e-24 | 2.045582e-21 | Slc44a4 |
| A530006G24Rik | 7.173198 | 0.5372368 | 231.9692 | 7.903342e-24 | 6.381325e-21 | A530006G24Rik |
| Bcan | -6.412518 | 2.8853652 | 564.1640 | 8.788216e-24 | 6.741001e-21 | Bcan |
# This summary was based on significance determined by FDR
summary(decideTests(results_Par_vs_SL$FDR))
## [,1]
## NotSig 6853
## Sig 8488
# Par vs SM
kable(head(results_Par_vs_SM, 20), digits=50, caption = "Top 20 DE Genes: Par vs SM") %>%
kable_styling("striped", full_width = FALSE, position = "center") %>% scroll_box(width = "80%", height = "500px")
| logFC | logCPM | F | PValue | FDR | Gene_names | |
|---|---|---|---|---|---|---|
| Cldn24 | 7.387387 | 1.4461368 | 349.1460 | 5.082161e-26 | 7.796543e-22 | Cldn24 |
| St3gal6 | 5.411942 | 6.0325314 | 579.1408 | 6.664753e-24 | 5.112199e-20 | St3gal6 |
| Galnt6 | 4.461964 | 5.4128654 | 525.4032 | 3.517193e-23 | 1.798575e-19 | Galnt6 |
| Tlx1 | -5.899304 | 4.9885551 | 520.1044 | 4.720355e-23 | 1.810374e-19 | Tlx1 |
| Ggh | 5.387791 | 10.5375660 | 450.5100 | 4.700715e-22 | 1.442273e-18 | Ggh |
| Cldn22 | 6.680395 | 6.5127189 | 441.7255 | 6.678743e-22 | 1.707643e-18 | Cldn22 |
| C330024C12Rik | 6.659246 | 0.6463565 | 207.3046 | 8.483972e-22 | 1.859323e-18 | C330024C12Rik |
| A530006G24Rik | 6.702542 | 0.5372368 | 189.9868 | 1.479490e-21 | 2.837107e-18 | A530006G24Rik |
| Csn3 | 6.647739 | 3.5915714 | 410.6277 | 3.374339e-21 | 5.751748e-18 | Csn3 |
| Prb1 | 7.087445 | 6.4659955 | 363.1883 | 1.779018e-20 | 2.729191e-17 | Prb1 |
| Liph | -3.668593 | 5.9470991 | 356.7811 | 2.320996e-20 | 3.236945e-17 | Liph |
| Shisa2 | 4.739191 | 4.4588110 | 354.7569 | 2.570511e-20 | 3.286184e-17 | Shisa2 |
| B4galnt3 | 5.972241 | 4.2350210 | 351.8476 | 3.229659e-20 | 3.811246e-17 | B4galnt3 |
| Klhl14 | 5.578961 | -0.4543305 | 160.5617 | 4.888843e-20 | 5.357124e-17 | Klhl14 |
| Bpifb4 | 6.108430 | 0.5093333 | 220.6385 | 1.039515e-19 | 1.052623e-16 | Bpifb4 |
| Gm8882 | 6.732739 | 2.6042303 | 308.8407 | 1.097840e-19 | 1.052623e-16 | Gm8882 |
| Gm9994 | 6.947403 | 2.1213282 | 234.1878 | 2.274457e-19 | 2.052497e-16 | Gm9994 |
| 1700066N21Rik | 6.913182 | 9.6857686 | 308.2725 | 2.529098e-19 | 2.155494e-16 | 1700066N21Rik |
| 4930426D05Rik | 5.755185 | 0.2724439 | 159.0632 | 3.417759e-19 | 2.759571e-16 | 4930426D05Rik |
| Cst10 | 7.015809 | 11.7977784 | 300.9220 | 3.744712e-19 | 2.872381e-16 | Cst10 |
# This summary was based on significance determined by FDR
summary(decideTests(results_Par_vs_SM$FDR))
## [,1]
## NotSig 8851
## Sig 6490
# SL vs SM
kable(head(results_SL_vs_SM, 20), digits=50, caption = "Top 20 DE Genes: SL vs SM") %>%
kable_styling("striped", full_width = FALSE, position = "center") %>% scroll_box(width = "80%", height = "500px")
| logFC | logCPM | F | PValue | FDR | Gene_names | |
|---|---|---|---|---|---|---|
| Galnt4 | 4.157584 | 6.521874 | 1015.6554 | 3.893774e-28 | 5.973438e-24 | Galnt4 |
| Nkx2-3 | 6.850034 | 5.448621 | 910.6768 | 3.157096e-27 | 2.421650e-23 | Nkx2-3 |
| Casc4 | 5.082725 | 5.804160 | 843.9180 | 9.989003e-27 | 4.991333e-23 | Casc4 |
| Sox2 | 6.084940 | 4.965056 | 838.5557 | 1.301436e-26 | 4.991333e-23 | Sox2 |
| Vwf | 4.118477 | 6.334989 | 803.0749 | 2.353816e-26 | 7.221979e-23 | Vwf |
| Bace2 | 3.945412 | 5.950228 | 780.6508 | 3.852107e-26 | 9.849197e-23 | Bace2 |
| Syt7 | 6.328771 | 6.268689 | 770.2607 | 4.951868e-26 | 1.085237e-22 | Syt7 |
| Hpse2 | 6.418600 | 2.049215 | 588.9173 | 1.138432e-25 | 2.057243e-22 | Hpse2 |
| Galnt12 | 5.242333 | 6.494604 | 730.9068 | 1.206909e-25 | 2.057243e-22 | Galnt12 |
| Tmco3 | 3.554465 | 7.062339 | 712.4616 | 1.874623e-25 | 2.875859e-22 | Tmco3 |
| Isl2 | 6.382053 | 3.165308 | 659.0448 | 5.797170e-25 | 8.084945e-22 | Isl2 |
| Fgl2 | 4.696106 | 6.467270 | 652.5791 | 8.537817e-25 | 1.026436e-21 | Fgl2 |
| Dnajc10 | 4.780718 | 8.340781 | 651.8237 | 8.698039e-25 | 1.026436e-21 | Dnajc10 |
| Tgoln1 | 2.822574 | 7.501807 | 586.2337 | 5.372613e-24 | 5.887233e-21 | Tgoln1 |
| Bcan | 6.328490 | 2.885365 | 556.0463 | 1.126810e-23 | 1.152426e-20 | Bcan |
| Foxp2 | 5.290392 | 2.163991 | 559.4940 | 1.582571e-23 | 1.517389e-20 | Foxp2 |
| Tmc5 | 6.253124 | 6.156531 | 531.2174 | 2.980537e-23 | 2.689672e-20 | Tmc5 |
| B3gnt6 | 6.822907 | 7.325964 | 510.0197 | 5.822527e-23 | 4.962411e-20 | B3gnt6 |
| Kcne1 | 5.814247 | 6.287985 | 488.3988 | 1.222004e-22 | 9.866717e-20 | Kcne1 |
| Galnt6 | 4.243983 | 5.412865 | 484.9055 | 1.370746e-22 | 1.051430e-19 | Galnt6 |
# This summary was based on significance determined by FDR
summary(decideTests(results_SL_vs_SM$FDR))
## [,1]
## NotSig 9987
## Sig 5354
Identifying upregulated and downregulated genes
# Upregulated genes were identified following the filtering parameters of logFC ≥ 1 and FDR < 0.05
upregulated_Par_vs_SL <- results_Par_vs_SL[
results_Par_vs_SL$FDR < 0.05 & results_Par_vs_SL$logFC >= 1, ]
# Downregulated genes were identified following the filtering parameters of logFC ≤ 1 and FDR < 0.05
downregulated_Par_vs_SL <- results_Par_vs_SL[
results_Par_vs_SL$FDR < 0.05 & results_Par_vs_SL$logFC <= -1, ]
# Top 20 upregulated genes were displayed
kable(head(upregulated_Par_vs_SL, 20), digits = 50, caption = "Top 20 Upregulated Genes: Par vs SL") %>%
kable_styling("striped", full_width = FALSE, position = "center") %>% scroll_box(width = "80%", height = "500px")
| logFC | logCPM | F | PValue | FDR | Gene_names | |
|---|---|---|---|---|---|---|
| Cldn24 | 7.144283 | 1.4461368 | 384.7686 | 4.570061e-27 | 2.202153e-23 | Cldn24 |
| St3gal6 | 6.021131 | 6.0325314 | 678.8974 | 4.346841e-25 | 5.337132e-22 | St3gal6 |
| A530006G24Rik | 7.173198 | 0.5372368 | 231.9692 | 7.903342e-24 | 6.381325e-21 | A530006G24Rik |
| C330024C12Rik | 6.963839 | 0.6463565 | 247.1584 | 1.017724e-23 | 7.434717e-21 | C330024C12Rik |
| Foxred2 | 4.115343 | 4.0718214 | 554.9747 | 1.379563e-23 | 8.818285e-21 | Foxred2 |
| Klhl14 | 6.518920 | -0.4543305 | 215.4948 | 2.226527e-23 | 1.265080e-20 | Klhl14 |
| 4930426D05Rik | 7.631623 | 0.2724439 | 225.7695 | 6.506348e-23 | 3.441858e-20 | 4930426D05Rik |
| Ggh | 5.765634 | 10.5375660 | 497.8351 | 8.698413e-23 | 4.170074e-20 | Ggh |
| Cldn22 | 7.185898 | 6.5127189 | 490.9728 | 1.123886e-22 | 5.071039e-20 | Cldn22 |
| Csn3 | 7.458079 | 3.5915714 | 488.2706 | 1.903241e-22 | 8.342177e-20 | Csn3 |
| Prb1 | 7.698074 | 6.4659955 | 407.9935 | 2.578007e-21 | 9.197489e-19 | Prb1 |
| Gm9994 | 7.556486 | 2.1213282 | 276.8395 | 9.385634e-21 | 2.879700e-18 | Gm9994 |
| Crabp2 | 6.524720 | 3.2424634 | 377.3742 | 1.323304e-20 | 3.798276e-18 | Crabp2 |
| Man2a2 | 2.662444 | 5.2737581 | 368.2397 | 1.363867e-20 | 3.804198e-18 | Man2a2 |
| Gm8882 | 6.750530 | 2.6042303 | 331.3574 | 3.360730e-20 | 8.055774e-18 | Gm8882 |
| 3110099E03Rik | 7.270441 | 3.8313719 | 350.9954 | 4.196144e-20 | 9.903546e-18 | 3110099E03Rik |
| Susd3 | 3.282256 | 2.5715994 | 334.8271 | 6.783510e-20 | 1.553221e-17 | Susd3 |
| 1700066N21Rik | 7.160861 | 9.6857686 | 323.7933 | 1.134104e-19 | 2.485469e-17 | 1700066N21Rik |
| Slc25a21 | 3.521792 | 4.0678813 | 318.8254 | 1.466527e-19 | 3.124722e-17 | Slc25a21 |
| Svs6 | 5.563076 | -1.1507408 | 163.5616 | 2.202092e-19 | 4.387311e-17 | Svs6 |
# Top 20 downregulated genes were displayed
kable(head(downregulated_Par_vs_SL, 20), digits = 50, caption = "Top 20 Downregulated Genes: Par vs SL") %>%
kable_styling("striped", full_width = FALSE, position = "center") %>% scroll_box(width = "80%", height = "500px")
| logFC | logCPM | F | PValue | FDR | Gene_names | |
|---|---|---|---|---|---|---|
| Tmco3 | -4.155307 | 7.062339 | 919.5926 | 2.216414e-27 | 2.202153e-23 | Tmco3 |
| Nkx2-3 | -6.701885 | 5.448621 | 885.1794 | 5.173102e-27 | 2.202153e-23 | Nkx2-3 |
| Sox2 | -6.312474 | 4.965056 | 879.0124 | 5.741875e-27 | 2.202153e-23 | Sox2 |
| Galnt4 | -3.587133 | 6.521874 | 798.5773 | 2.594106e-26 | 7.959235e-23 | Galnt4 |
| Casc4 | -4.808568 | 5.804160 | 777.0299 | 4.198472e-26 | 1.073479e-22 | Casc4 |
| Isl2 | -6.851417 | 3.165308 | 711.9135 | 1.519104e-25 | 3.329225e-22 | Isl2 |
| Hpse2 | -6.216574 | 2.049215 | 572.3787 | 1.949171e-25 | 3.651888e-22 | Hpse2 |
| Liph | -5.637708 | 5.947099 | 707.4812 | 2.142428e-25 | 3.651888e-22 | Liph |
| Bace2 | -3.638722 | 5.950228 | 684.0296 | 3.793755e-25 | 5.337132e-22 | Bace2 |
| Syt7 | -5.785481 | 6.268689 | 679.6713 | 4.311307e-25 | 5.337132e-22 | Syt7 |
| Tgoln1 | -3.065223 | 7.501807 | 677.0345 | 4.522698e-25 | 5.337132e-22 | Tgoln1 |
| Gcc2 | -2.140715 | 5.504681 | 663.5392 | 6.400906e-25 | 7.014022e-22 | Gcc2 |
| Vwf | -3.636105 | 6.334989 | 655.6737 | 7.867471e-25 | 8.046324e-22 | Vwf |
| Ptprt | -5.990776 | 1.854156 | 550.9769 | 1.126969e-24 | 1.080552e-21 | Ptprt |
| Heatr5a | -2.458494 | 5.782096 | 619.3447 | 2.094741e-24 | 1.890319e-21 | Heatr5a |
| Slc44a4 | -5.322553 | 6.431785 | 614.5393 | 2.400135e-24 | 2.045582e-21 | Slc44a4 |
| Bcan | -6.412518 | 2.885365 | 564.1640 | 8.788216e-24 | 6.741001e-21 | Bcan |
| Tmc5 | -6.528392 | 6.156531 | 563.0624 | 1.105612e-23 | 7.566516e-21 | Tmc5 |
| Sec23b | -3.519258 | 8.572684 | 561.1589 | 1.134410e-23 | 7.566516e-21 | Sec23b |
| Dnajc10 | -4.294980 | 8.340781 | 551.4164 | 1.529635e-23 | 9.386453e-21 | Dnajc10 |
# Upregulated genes were identified following the filtering parameters of logFC ≥ 1 and FDR < 0.05
upregulated_Par_vs_SM <- results_Par_vs_SM[
results_Par_vs_SM$FDR < 0.05 & results_Par_vs_SM$logFC >= 1, ]
# Downregulated genes were identified following the filtering parameters of logFC ≤ 1 and FDR < 0.05
downregulated_Par_vs_SM <- results_Par_vs_SM[
results_Par_vs_SM$FDR < 0.05 & results_Par_vs_SM$logFC <= -1, ]
# Top 20 upregulated genes were displayed
kable(head(upregulated_Par_vs_SM, 20), digits = 50, caption = "Top 20 Upregulated Genes: Par vs SM") %>%
kable_styling("striped", full_width = FALSE, position = "center") %>% scroll_box(width = "80%", height = "500px")
| logFC | logCPM | F | PValue | FDR | Gene_names | |
|---|---|---|---|---|---|---|
| Cldn24 | 7.387387 | 1.4461368 | 349.1460 | 5.082161e-26 | 7.796543e-22 | Cldn24 |
| St3gal6 | 5.411942 | 6.0325314 | 579.1408 | 6.664753e-24 | 5.112199e-20 | St3gal6 |
| Galnt6 | 4.461964 | 5.4128654 | 525.4032 | 3.517193e-23 | 1.798575e-19 | Galnt6 |
| Ggh | 5.387791 | 10.5375660 | 450.5100 | 4.700715e-22 | 1.442273e-18 | Ggh |
| Cldn22 | 6.680395 | 6.5127189 | 441.7255 | 6.678743e-22 | 1.707643e-18 | Cldn22 |
| C330024C12Rik | 6.659246 | 0.6463565 | 207.3046 | 8.483972e-22 | 1.859323e-18 | C330024C12Rik |
| A530006G24Rik | 6.702542 | 0.5372368 | 189.9868 | 1.479490e-21 | 2.837107e-18 | A530006G24Rik |
| Csn3 | 6.647739 | 3.5915714 | 410.6277 | 3.374339e-21 | 5.751748e-18 | Csn3 |
| Prb1 | 7.087445 | 6.4659955 | 363.1883 | 1.779018e-20 | 2.729191e-17 | Prb1 |
| Shisa2 | 4.739191 | 4.4588110 | 354.7569 | 2.570511e-20 | 3.286184e-17 | Shisa2 |
| B4galnt3 | 5.972241 | 4.2350210 | 351.8476 | 3.229659e-20 | 3.811246e-17 | B4galnt3 |
| Klhl14 | 5.578961 | -0.4543305 | 160.5617 | 4.888843e-20 | 5.357124e-17 | Klhl14 |
| Bpifb4 | 6.108430 | 0.5093333 | 220.6385 | 1.039515e-19 | 1.052623e-16 | Bpifb4 |
| Gm8882 | 6.732739 | 2.6042303 | 308.8407 | 1.097840e-19 | 1.052623e-16 | Gm8882 |
| Gm9994 | 6.947403 | 2.1213282 | 234.1878 | 2.274457e-19 | 2.052497e-16 | Gm9994 |
| 1700066N21Rik | 6.913182 | 9.6857686 | 308.2725 | 2.529098e-19 | 2.155494e-16 | 1700066N21Rik |
| 4930426D05Rik | 5.755185 | 0.2724439 | 159.0632 | 3.417759e-19 | 2.759571e-16 | 4930426D05Rik |
| Cst10 | 7.015809 | 11.7977784 | 300.9220 | 3.744712e-19 | 2.872381e-16 | Cst10 |
| Foxred2 | 2.849927 | 4.0718214 | 298.4485 | 4.301921e-19 | 3.142656e-16 | Foxred2 |
| Dcpp1 | 5.834276 | 12.7182491 | 290.0334 | 6.806902e-19 | 4.351029e-16 | Dcpp1 |
# Top 20 downregulated genes were displayed
kable(head(downregulated_Par_vs_SM, 20), digits = 50, caption = "Top 20 Downregulated Genes: Par vs SM") %>%
kable_styling("striped", full_width = FALSE, position = "center") %>% scroll_box(width = "80%", height = "500px")
| logFC | logCPM | F | PValue | FDR | Gene_names | |
|---|---|---|---|---|---|---|
| Tlx1 | -5.899304 | 4.9885551 | 520.1044 | 4.720355e-23 | 1.810374e-19 | Tlx1 |
| Liph | -3.668593 | 5.9470991 | 356.7811 | 2.320996e-20 | 3.236945e-17 | Liph |
| Ptprt | -4.100384 | 1.8541562 | 264.3664 | 5.238903e-19 | 3.653182e-16 | Ptprt |
| Gpr155 | -5.056716 | 7.1810574 | 290.7018 | 6.563266e-19 | 4.351029e-16 | Gpr155 |
| Duoxa2 | -6.383686 | 3.6682549 | 288.9538 | 7.995886e-19 | 4.732304e-16 | Duoxa2 |
| Duox2 | -6.184777 | 4.5472542 | 263.2697 | 3.628761e-18 | 1.590538e-15 | Duox2 |
| Kcnj3 | -5.479426 | -0.6783567 | 166.5889 | 4.667624e-18 | 1.935298e-15 | Kcnj3 |
| Dstyk | -1.570262 | 4.8940996 | 244.7219 | 1.036362e-17 | 4.076622e-15 | Dstyk |
| Mal2 | -3.007438 | 4.5782783 | 230.2453 | 2.719818e-17 | 9.482892e-15 | Mal2 |
| Vps33b | -1.239887 | 4.5192099 | 229.4735 | 2.864251e-17 | 9.764550e-15 | Vps33b |
| Crim1 | -1.725672 | 5.6549675 | 226.6257 | 3.484749e-17 | 1.162164e-14 | Crim1 |
| Fmn1 | -2.657240 | 4.7880313 | 213.4424 | 8.906618e-17 | 2.829331e-14 | Fmn1 |
| Ccdc68 | -2.861194 | 3.2773613 | 213.4754 | 9.037040e-17 | 2.829331e-14 | Ccdc68 |
| Ppm1l | -2.253322 | 3.6660778 | 208.3366 | 1.301410e-16 | 3.914692e-14 | Ppm1l |
| Pcp4l1 | -3.479140 | 4.9022820 | 207.4385 | 1.388697e-16 | 4.096923e-14 | Pcp4l1 |
| Arhgef10 | -1.913454 | 6.2555648 | 204.3525 | 1.749485e-16 | 4.970156e-14 | Arhgef10 |
| Esrrg | -4.322322 | 6.1235405 | 203.6277 | 1.850073e-16 | 5.068208e-14 | Esrrg |
| Rnf141 | -1.720475 | 4.3749706 | 193.0026 | 4.225180e-16 | 1.098619e-13 | Rnf141 |
| Pde4d | -1.627661 | 5.0089214 | 188.8787 | 5.879873e-16 | 1.478740e-13 | Pde4d |
| Crlf1 | -3.428918 | 5.3052569 | 181.4193 | 1.093816e-15 | 2.663529e-13 | Crlf1 |
# Upregulated genes were identified following the filtering parameters of logFC ≥ 1 and FDR < 0.05
upregulated_SL_vs_SM <- results_SL_vs_SM[
results_SL_vs_SM$FDR < 0.05 & results_SL_vs_SM$logFC >= 1, ]
# Downregulated genes were identified following the filtering parameters of logFC ≤ 1 and FDR < 0.05
downregulated_SL_vs_SM <- results_SL_vs_SM[
results_SL_vs_SM$FDR < 0.05 & results_SL_vs_SM$logFC <= -1, ]
# Top 20 upregulated genes were displayed
kable(head(upregulated_SL_vs_SM, 20), digits = 50, caption = "Top 20 Upregulated Genes: SL vs SM") %>%
kable_styling("striped", full_width = FALSE) %>% scroll_box(width = "80%", height = "500px")
| logFC | logCPM | F | PValue | FDR | Gene_names | |
|---|---|---|---|---|---|---|
| Galnt4 | 4.157584 | 6.521874 | 1015.6554 | 3.893774e-28 | 5.973438e-24 | Galnt4 |
| Nkx2-3 | 6.850034 | 5.448621 | 910.6768 | 3.157096e-27 | 2.421650e-23 | Nkx2-3 |
| Casc4 | 5.082725 | 5.804160 | 843.9180 | 9.989003e-27 | 4.991333e-23 | Casc4 |
| Sox2 | 6.084940 | 4.965056 | 838.5557 | 1.301436e-26 | 4.991333e-23 | Sox2 |
| Vwf | 4.118477 | 6.334989 | 803.0749 | 2.353816e-26 | 7.221979e-23 | Vwf |
| Bace2 | 3.945412 | 5.950228 | 780.6508 | 3.852107e-26 | 9.849197e-23 | Bace2 |
| Syt7 | 6.328771 | 6.268689 | 770.2607 | 4.951868e-26 | 1.085237e-22 | Syt7 |
| Hpse2 | 6.418600 | 2.049215 | 588.9173 | 1.138432e-25 | 2.057243e-22 | Hpse2 |
| Galnt12 | 5.242333 | 6.494604 | 730.9068 | 1.206909e-25 | 2.057243e-22 | Galnt12 |
| Tmco3 | 3.554465 | 7.062339 | 712.4616 | 1.874623e-25 | 2.875859e-22 | Tmco3 |
| Isl2 | 6.382053 | 3.165308 | 659.0448 | 5.797170e-25 | 8.084945e-22 | Isl2 |
| Fgl2 | 4.696106 | 6.467270 | 652.5791 | 8.537817e-25 | 1.026436e-21 | Fgl2 |
| Dnajc10 | 4.780718 | 8.340781 | 651.8237 | 8.698039e-25 | 1.026436e-21 | Dnajc10 |
| Tgoln1 | 2.822574 | 7.501807 | 586.2337 | 5.372613e-24 | 5.887233e-21 | Tgoln1 |
| Bcan | 6.328490 | 2.885365 | 556.0463 | 1.126810e-23 | 1.152426e-20 | Bcan |
| Foxp2 | 5.290392 | 2.163991 | 559.4940 | 1.582571e-23 | 1.517389e-20 | Foxp2 |
| Tmc5 | 6.253124 | 6.156531 | 531.2174 | 2.980537e-23 | 2.689672e-20 | Tmc5 |
| B3gnt6 | 6.822907 | 7.325964 | 510.0197 | 5.822527e-23 | 4.962411e-20 | B3gnt6 |
| Kcne1 | 5.814247 | 6.287985 | 488.3988 | 1.222004e-22 | 9.866717e-20 | Kcne1 |
| Galnt6 | 4.243983 | 5.412865 | 484.9055 | 1.370746e-22 | 1.051430e-19 | Galnt6 |
# Top 20 downregulated genes were displayed
kable(head(downregulated_SL_vs_SM, 20), digits = 50, caption = "Top 20 Downregulated Genes: SL vs SM") %>%
kable_styling("striped", full_width = FALSE) %>% scroll_box(width = "80%", height = "500px")
| logFC | logCPM | F | PValue | FDR | Gene_names | |
|---|---|---|---|---|---|---|
| Srsf10 | -1.351846 | 6.6333619 | 249.8339 | 7.460464e-18 | 2.043767e-15 | Srsf10 |
| Ttc7 | -2.333207 | 5.8326110 | 215.9233 | 7.431902e-17 | 1.443200e-14 | Ttc7 |
| Ampd2 | -1.713203 | 4.1361510 | 196.7664 | 3.138833e-16 | 5.234005e-14 | Ampd2 |
| Fam53b | -1.483525 | 5.1604965 | 191.4929 | 4.762507e-16 | 7.455268e-14 | Fam53b |
| Ttll12 | -1.298845 | 3.5230415 | 159.1228 | 7.812595e-15 | 7.990201e-13 | Ttll12 |
| Slc25a12 | -1.473749 | 5.7097686 | 154.9362 | 1.159215e-14 | 1.111470e-12 | Slc25a12 |
| Dleu7 | -6.190975 | -0.1335977 | 113.5056 | 1.422493e-14 | 1.330638e-12 | Dleu7 |
| Kcnj3 | -3.295787 | -0.6783567 | 109.4816 | 1.518714e-14 | 1.386821e-12 | Kcnj3 |
| Ero1lb | -2.429578 | 6.6603718 | 151.3293 | 1.641210e-14 | 1.481047e-12 | Ero1lb |
| Rcan2 | -2.985086 | 2.5220846 | 142.2988 | 4.082663e-14 | 3.245189e-12 | Rcan2 |
| Mid2 | -2.674937 | 3.7878651 | 138.9612 | 5.709815e-14 | 4.357924e-12 | Mid2 |
| Hepacam2 | -2.558095 | 3.1546787 | 135.6365 | 8.124405e-14 | 5.851479e-12 | Hepacam2 |
| Agtpbp1 | -1.330973 | 3.6246609 | 134.9155 | 8.744998e-14 | 6.239861e-12 | Agtpbp1 |
| Tfeb | -1.416673 | 4.7292016 | 131.0261 | 1.330074e-13 | 9.068743e-12 | Tfeb |
| Oit1 | -3.093703 | 7.1624354 | 130.7650 | 1.368571e-13 | 9.168230e-12 | Oit1 |
| BC017612 | -2.366891 | 6.8426447 | 130.2717 | 1.444579e-13 | 9.511283e-12 | BC017612 |
| Mpp6 | -1.569008 | 4.8724220 | 126.2015 | 2.270727e-13 | 1.433548e-11 | Mpp6 |
| Derl3 | -3.063186 | 7.5107406 | 125.2139 | 2.538426e-13 | 1.585946e-11 | Derl3 |
| Adrb1 | -3.426786 | 5.0246723 | 124.5096 | 2.750620e-13 | 1.701503e-11 | Adrb1 |
| Adtrp | -4.098484 | 4.3284122 | 122.5405 | 3.458436e-13 | 2.061027e-11 | Adtrp |
DEG counts:
Par vs SL: Number of Upregulated Genes: 2483
Par vs SL: Number of Downregulated Genes: 1454
Par vs SM: Number of Upregulated Genes: 2135
Par vs SM: Number of Downregulated Genes: 1027
SL vs SM: Number of Upregulated Genes: 1082
SL vs SM: Number of Downregulated Genes: 1082
8) Vizualization plots
# Defined thresholds for significance
fdr_threshold <- 0.05
logfc_threshold <- 1
# Selected the results for Par vs SL comparison
top_genes_Par_vs_SL <- results_Par_vs_SL
# Classified genes based on logFC and FDR
top_genes_Par_vs_SL$Significance <- "Not Significant"
top_genes_Par_vs_SL$Significance[top_genes_Par_vs_SL$logFC >= logfc_threshold & top_genes_Par_vs_SL$FDR < fdr_threshold] <- "Upregulated"
top_genes_Par_vs_SL$Significance[top_genes_Par_vs_SL$logFC <= -logfc_threshold & top_genes_Par_vs_SL$FDR < fdr_threshold] <- "Downregulated"
# Created an MA plot, colored the plot as per significance determined by the logfc and FDR thresholds
ggplot(top_genes_Par_vs_SL, aes(x = logCPM, y = logFC)) +
geom_point(aes(color = Significance), alpha = 0.6, size = 1.5) +
scale_color_manual(values = c("Not Significant" = "grey",
"Upregulated" = "purple",
"Downregulated" = "pink")) + # Custom colors for categories
labs(title = "MA Plot - Par vs SM",
x = "Average Log CPM",
y = "Log Fold Change") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
axis.title = element_text(size = 14),
legend.title = element_blank(),
legend.position = "top",
legend.text = element_text(size = 12)
) +
geom_hline(yintercept = 0, linetype = "dashed", color = "black")
# Create the volcano plot with adjusted p-values (FDR) using EnhancedVolcano function from the EnhancedVolcano package
EnhancedVolcano(results_Par_vs_SM,
lab = results_Par_vs_SM$Gene_names, # Labels for each gene
x = 'logFC', # X-axis: log fold change
y = 'FDR', # Y-axis: FDR (adjusted p-values)
pCutoff = 0.05, # Cutoff for FDR significance
FCcutoff = 1, # Fold change cutoff
title = 'Volcano Plot (Par vs SM)', # Plot title
xlim = c(-10, 10), # Adjust logFC range if needed
ylim = c(0, -log10(min(results_Par_vs_SM$FDR, na.rm=TRUE))), # Adjust FDR y-axis
pointSize = 1.0, # Point size for each gene
labSize = 3.0 # Label size
)
# Calculated normalized counts are stored in a variable normCounts
normCounts <- cpm(y, normalized.lib.sizes = TRUE, log = TRUE)
# From the topTags table, Filtered significant genes for SL vs SM comparison are stored in sigGenes_SL_vs_SM
sigGenes_SL_vs_SM <- topTags_SL_vs_SM$table
sigGenes_SL_vs_SM <- sigGenes_SL_vs_SM[sigGenes_SL_vs_SM$FDR < 0.05, ]
sigGenes_SL_vs_SM <- sigGenes_SL_vs_SM[order(sigGenes_SL_vs_SM$FDR), ]
# Selected relevant samples for the comparison
selected_samples_SL_vs_SM <- c("SL_1A_Mal", "SL_1B_Mal", "SL_2A_Mal", "SL_2B_Mal", "SL_3A_Mal", "SL_3B_Mal", "SL_4A_Fem", "SL_4B_Fem", "SL_5A_Fem", "SL_5B_Fem", "SL_6A_Fem", "SL_6B_Fem","SM_1A_Mal", "SM_1B_Mal", "SM_2A_Mal", "SM_2B_Mal", "SM_3A_Mal", "SM_3B_Mal", "SM_4A_Fem", "SM_4B_Fem", "SM_5A_Fem", "SM_5B_Fem", "SM_6A_Fem", "SM_6B_Fem")
# Selecting our relevant samples from normCounts table and storing in another variable normCounts2
normCounts2 <- normCounts[, selected_samples_SL_vs_SM]
# Filtered to include only significant genes for SL_vs_SM
normCounts2 <- normCounts2[rownames(normCounts2) %in% rownames(sigGenes_SL_vs_SM), ]
# Selected top 50 significant genes for visualization
topSigGenes_SL_vs_SM <- head(normCounts2, 50)
# Defined strain annotation for SL and SM samples
strain_info_SL_vs_SM <- data.frame(
Strain = c(rep("SL", 12), rep("SM", 12))
)
#Making the selected relevant samples as rownames
rownames(strain_info_SL_vs_SM) <- selected_samples_SL_vs_SM
# Defined colors for the strain annotation
strain_colors_SL_vs_SM <- list(Strain = c("SL" = "palegreen", "SM" = "paleturquoise1"))
# Generated the heatmap for the top significant genes for SL vs SM by using the function pheatmap from pheatmap package
pheatmap(topSigGenes_SL_vs_SM, scale = 'row', show_rownames = TRUE, treeheight_row = F, treeheight_col = 0, cluster_rows = TRUE, cluster_cols = FALSE, fontsize_row = 5, labels_col = selected_samples_SL_vs_SM, annotation_col = strain_info_SL_vs_SM, annotation_colors = strain_colors_SL_vs_SM, main = "Heatmap of SL vs SM (Top 50 genes)")
9) Pathway analysis
Gene Ontology: BP, CC, MF of Par vs SL
# Extracted the gene names from upregulated genes list and stored in a variable 'upregulated_genes_pathway_Par_vs_SL'
upregulated_genes_pathway_Par_vs_SL <- rownames(upregulated_Par_vs_SL)
# Extracted all genes tested in the differential expression analysis
all_genes_pathway_Par_vs_SL <- rownames(results_Par_vs_SL)
# Convert gene symbols to Entrez IDs using org.Mm.eg.db
upregulated_entrez_Par_vs_SL <- bitr(upregulated_genes_pathway_Par_vs_SL, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Mm.eg.db)$ENTREZID
all_entrez_Par_vs_SL <- bitr(all_genes_pathway_Par_vs_SL, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Mm.eg.db)$ENTREZID
# Run GO enrichment analysis for each category (BP, CC, MF) by using enrichGO
ego_bp_Par_vs_SL <- enrichGO(
gene = upregulated_entrez_Par_vs_SL,
universe = all_entrez_Par_vs_SL,
keyType = "ENTREZID",
OrgDb = org.Mm.eg.db,
ont = "BP",
pAdjustMethod = "BH",
qvalueCutoff = 0.05,
readable = TRUE
)
ego_cc_Par_vs_SL <- enrichGO(
gene = upregulated_entrez_Par_vs_SL,
universe = all_entrez_Par_vs_SL,
keyType = "ENTREZID",
OrgDb = org.Mm.eg.db,
ont = "CC",
pAdjustMethod = "BH",
qvalueCutoff = 0.05,
readable = TRUE
)
ego_mf_Par_vs_SL <- enrichGO(
gene = upregulated_entrez_Par_vs_SL,
universe = all_entrez_Par_vs_SL,
keyType = "ENTREZID",
OrgDb = org.Mm.eg.db,
ont = "MF",
pAdjustMethod = "BH",
qvalueCutoff = 0.05,
readable = TRUE
)
# Prepared the results for plotting by selecting top 15 terms for each category
top_bp_Par_vs_SL <- ego_bp_Par_vs_SL@result %>%
arrange(p.adjust) %>%
top_n(15, wt = -pvalue) %>%
mutate(Category = "Biological Process", logPvalue = -log10(pvalue))
top_cc_Par_vs_SL <- ego_cc_Par_vs_SL@result %>%
arrange(p.adjust) %>%
top_n(15, wt = -pvalue) %>%
mutate(Category = "Cellular Component", logPvalue = -log10(pvalue))
top_mf_Par_vs_SL <- ego_mf_Par_vs_SL@result %>%
arrange(p.adjust) %>%
top_n(15, wt = -pvalue) %>%
mutate(Category = "Molecular Function", logPvalue = -log10(pvalue))
combined_results_Par_vs_SL <- bind_rows(top_bp_Par_vs_SL, top_cc_Par_vs_SL, top_mf_Par_vs_SL)
# Colorblind-friendly palette
cb_palette <- brewer.pal(n = 3, name = "Set2")
# Vizualized combined data in the form of barplots
ggplot(combined_results_Par_vs_SL, aes(x = reorder(Description, pvalue), y = logPvalue, fill = Category)) +
geom_bar(stat = "identity") +
facet_wrap(~Category, scales = "free") +
theme_minimal() +
scale_fill_manual(values = cb_palette) +
labs(
x = "GO Term",
y = "-log10(p-value)",
title = "Top 15 Gene Function Classification (GO) for Upregulated Genes in Par vs SL"
) +
theme(
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1, size = 8),
legend.title = element_blank(),
legend.position = "bottom",
plot.title = element_text(hjust = 0.5, size = 14, face = "bold")
) +
guides(fill = guide_legend(reverse = TRUE))
# Extracted the gene names from downregulated genes list and stored in a variable 'downregulated_genes_pathway_Par_vs_SL'
downregulated_genes_pathway_Par_vs_SL <- rownames(downregulated_Par_vs_SL)
# Convert gene symbols to Entrez IDs using org.Mm.eg.db
downregulated_entrez_Par_vs_SL <- bitr(downregulated_genes_pathway_Par_vs_SL, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Mm.eg.db)$ENTREZID
all_entrez_Par_vs_SL <- bitr(all_genes_pathway_Par_vs_SL, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Mm.eg.db)$ENTREZID
# Run GO enrichment analysis for each category (BP, CC, MF) by using enrichGO
ego_bp_down_Par_vs_SL <- enrichGO(
gene = downregulated_entrez_Par_vs_SL,
universe = all_entrez_Par_vs_SL,
keyType = "ENTREZID",
OrgDb = org.Mm.eg.db,
ont = "BP",
pAdjustMethod = "BH",
qvalueCutoff = 0.05,
readable = TRUE
)
ego_cc_down_Par_vs_SL <- enrichGO(
gene = downregulated_entrez_Par_vs_SL,
universe = all_entrez_Par_vs_SL,
keyType = "ENTREZID",
OrgDb = org.Mm.eg.db,
ont = "CC",
pAdjustMethod = "BH",
qvalueCutoff = 0.05,
readable = TRUE
)
ego_mf_down_Par_vs_SL <- enrichGO(
gene = downregulated_entrez_Par_vs_SL,
universe = all_entrez_Par_vs_SL,
keyType = "ENTREZID",
OrgDb = org.Mm.eg.db,
ont = "MF",
pAdjustMethod = "BH",
qvalueCutoff = 0.05,
readable = TRUE
)
# Prepared the results for plotting
top_bp_down_Par_vs_SL <- ego_bp_down_Par_vs_SL@result %>%
arrange(p.adjust) %>%
top_n(15, wt = -pvalue) %>%
mutate(Category = "Biological Process", logPvalue = -log10(pvalue))
top_cc_down_Par_vs_SL <- ego_cc_down_Par_vs_SL@result %>%
arrange(p.adjust) %>%
top_n(15, wt = -pvalue) %>%
mutate(Category = "Cellular Component", logPvalue = -log10(pvalue))
top_mf_down_Par_vs_SL <- ego_mf_down_Par_vs_SL@result %>%
arrange(p.adjust) %>%
top_n(15, wt = -pvalue) %>%
mutate(Category = "Molecular Function", logPvalue = -log10(pvalue))
combined_results_down_Par_vs_SL <- bind_rows(top_bp_down_Par_vs_SL, top_cc_down_Par_vs_SL, top_mf_down_Par_vs_SL)
# Vizualized combined data in the form of barplots
ggplot(combined_results_down_Par_vs_SL, aes(x = reorder(Description, pvalue), y = logPvalue, fill = Category)) +
geom_bar(stat = "identity") +
facet_wrap(~Category, scales = "free") +
theme_minimal() +
scale_fill_manual(values = cb_palette) +
labs(
x = "GO Term",
y = "-log10(p-value)",
title = "Top 15 Gene Function Classification (GO) for Downregulated Genes in Par vs SL"
) +
theme(
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 1, size = 8),
legend.title = element_blank(),
legend.position = "bottom",
plot.title = element_text(hjust = 0.5, size = 14, face = "bold")
) +
guides(fill = guide_legend(reverse = TRUE))
KEGG Pathway: Par vs SM
# Extracted the gene names from upregulated genes list and stored in a variable 'upregulated_genes_pathway_Par_vs_SM'
upregulated_genes_pathway_Par_vs_SM <- rownames(upregulated_Par_vs_SM)
# Converted gene symbols to Entrez IDs using org.Mm.eg.db
upregulated_entrez_Par_vs_SM <- bitr(upregulated_genes_pathway_Par_vs_SM, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Mm.eg.db)$ENTREZID
# Run KEGG pathway enrichment analysis using enrichKEGG
kegg_up_Par_vs_SM <- enrichKEGG(
gene = upregulated_entrez_Par_vs_SM,
organism = 'mmu', # Mouse KEGG pathway
pAdjustMethod = "BH",
qvalueCutoff = 0.05
)
# Created the dotplot
dotplot <- dotplot(kegg_up_Par_vs_SM , showCategory = 20) + ggtitle("KEGG: Par vs SM upregulated genes")
# Adjusted font sizes for better readability
dotplot + theme(
axis.text.y = element_text(size = 6.5), # Y-axis text size
)
# Extracted the gene names from downregulated genes list and stored in a variable 'downregulated_genes_pathway_Par_vs_SM'
downregulated_genes_pathway_Par_vs_SM <- rownames(downregulated_Par_vs_SM)
# Converted gene symbols to Entrez IDs using org.Mm.eg.db
downregulated_entrez_Par_vs_SM <- bitr(downregulated_genes_pathway_Par_vs_SM, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Mm.eg.db)$ENTREZID
# Run KEGG pathway enrichment analysis using enrichKEGG
kegg_down_Par_vs_SM <- enrichKEGG(
gene = downregulated_entrez_Par_vs_SM,
organism = 'mmu', # Mouse KEGG pathway (use 'hsa' for human)
pAdjustMethod = "BH",
qvalueCutoff = 0.05
)
# Create the dotplot
dotplot1 <- dotplot(kegg_down_Par_vs_SM , showCategory = 20) + ggtitle("KEGG: Par vs SM Downregulated genes")
# Adjust font sizes using theme()
dotplot1 + theme(
axis.text.y = element_text(size = 6.5), # Y-axis text size
)
Reactome Pathway: SL vs SM
# Extracted the gene names from upregulated genes list and stored in a variable 'upregulated_genes_pathway_SL_vs_SM'
upregulated_genes_pathway_SL_vs_SM <- rownames(upregulated_SL_vs_SM)
# Converted gene symbols to Entrez IDs using org.Mm.eg.db
upregulated_entrez_SL_vs_SM <- bitr(upregulated_genes_pathway_SL_vs_SM, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Mm.eg.db)$ENTREZID
# Performed reactome pathway enrichment analysis using enrichPathway
reactome_up_SL_vs_SM <- enrichPathway(
gene = upregulated_entrez_SL_vs_SM,
organism = 'mouse', # Use 'human' for human data
pAdjustMethod = "BH",
qvalueCutoff = 0.05,
readable = TRUE
)
# Calculated pairwise similarities between enriched terms
reactome_up_SL_vs_SM1 <- pairwise_termsim(reactome_up_SL_vs_SM)
# Created a tree plot to visualize the enrichment results
treeplot <- treeplot(reactome_up_SL_vs_SM1) + ggtitle("Reactome Pathway Enrichment (Upregulated Genes in SL vs SM)")
#Printing the plot
print(treeplot)
- SL vs SM: Downregulated Genes
# Extracted the gene names from downregulated genes list and stored in a variable 'downregulated_genes_pathway_SL_vs_SM'
downregulated_genes_pathway_SL_vs_SM <- rownames(downregulated_SL_vs_SM)
# Converted gene symbols to Entrez IDs using org.Mm.eg.db
downregulated_entrez_SL_vs_SM <- bitr(downregulated_genes_pathway_SL_vs_SM, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Mm.eg.db)$ENTREZID
# Run Reactome pathway enrichment analysis using enrichPathway
reactome_down_SL_vs_SM <- enrichPathway(
gene = downregulated_entrez_SL_vs_SM,
organism = 'mouse', # Use 'human' for human data
pAdjustMethod = "BH",
qvalueCutoff = 0.05,
readable = TRUE
)
# Calculated pairwise similarities between enriched terms
reactome_down_SL_vs_SM1 <- pairwise_termsim(reactome_down_SL_vs_SM)
# Created a tree plot to visualize the enrichment results
treeplot1 <- treeplot(reactome_down_SL_vs_SM1) + ggtitle("Reactome Pathway Enrichment (Downregulated Genes in SL vs SM)")
# Printing the plot
print(treeplot1)
10. Gene Set Enrichment Analysis
Preparing Gene Sets using Hallmark gene sets from MSigDB
# Retrieved Hallmark gene sets for Mus musculus
msig_hallmark <- msigdbr(species = "Mus musculus", category = "H")
# Prepared TERM2GENE as a data frame with two columns: "term" and "gene"
hallmark_gene_sets <- msig_hallmark %>%
dplyr::select(gs_name, entrez_gene) %>%
dplyr::rename(term = gs_name, gene = entrez_gene)
Generating Ranked Gene Lists
# Created a function to generate ranked gene lists
create_ranked_list <- function(de_results, gene_column = "Gene_names", logfc_column = "logFC") {
# Ensured gene symbols are unique
de_results <- de_results[!duplicated(de_results[[gene_column]]), ]
# Removed any NA values
de_results <- de_results[!is.na(de_results[[logfc_column]]), ]
# Converted gene symbols to Entrez IDs
gene_entrez <- bitr(de_results[[gene_column]],
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Mm.eg.db)
# Merged with DE results
de_results <- merge(de_results, gene_entrez, by.x = gene_column, by.y = "SYMBOL")
# Removed any genes that failed to map
de_results <- de_results[!is.na(de_results$ENTREZID), ]
# Created a named vector for ranking
ranked_list <- de_results$logFC
names(ranked_list) <- de_results$ENTREZID
# Removed duplicates and sort in decreasing order
ranked_list <- ranked_list[!duplicated(names(ranked_list))]
ranked_list <- sort(ranked_list, decreasing = TRUE)
return(ranked_list)
}
Performing GSEA for Each Comparison
# Created ranked list for Par_vs_SL
ranked_Par_vs_SL <- create_ranked_list(results_Par_vs_SL)
# Run GSEA using clusterProfiler's GSEA function
gsea_Par_vs_SL <- GSEA(
ranked_Par_vs_SL,
TERM2GENE = hallmark_gene_sets,
pvalueCutoff = 0.05,
eps = 0, # Set eps to zero for better estimation
verbose = FALSE
)
# Extracted pathways with P-value == 1e-10 (extremely significant)
extremely_significant_Par_vs_SL <- gsea_Par_vs_SL@result %>%
filter(pvalue == 1e-10)
# Converted GSEA results to a data frame
gsea_results_df_Par_vs_SL <- as.data.frame(gsea_Par_vs_SL)
# Combineed top 15 pathways with extremely significant ones
combined_top_Par_vs_SL <- gsea_results_df_Par_vs_SL %>%
arrange(p.adjust) %>%
head(15) %>%
bind_rows(extremely_significant_Par_vs_SL)
# Removed potential duplicates based on Description
combined_top_Par_vs_SL <- combined_top_Par_vs_SL %>%
distinct(Description, .keep_all = TRUE)
# Created a bar plot for Par_vs_SL
ggplot(combined_top_Par_vs_SL, aes(x = reorder(Description, NES), y = NES, fill = NES)) +
geom_bar(stat = "identity") +
coord_flip() +
scale_fill_gradient(low = "lightblue", high = "lightcoral") +
theme_minimal() +
labs(title = "Top Enriched Gene Sets - Par_vs_SL",
x = "Gene Set",
y = "Normalized Enrichment Score (NES)") +
theme(
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
axis.text = element_text(size = 12)
)
# Displayed the top 20 GSEA results in a scrollable table
kable(head(gsea_Par_vs_SL, 20), digits=50) %>% kable_styling("striped", full_width = FALSE, position = "center") %>% scroll_box(width = "100%", height = "500px")
| ID | Description | setSize | enrichmentScore | NES | pvalue | p.adjust | qvalue | rank | leading_edge | core_enrichment | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| HALLMARK_ALLOGRAFT_REJECTION | HALLMARK_ALLOGRAFT_REJECTION | HALLMARK_ALLOGRAFT_REJECTION | 172 | 0.7177226 | 2.249958 | 1.004867e-19 | 5.024334e-18 | 3.490590e-18 | 1760 | tags=51%, list=13%, signal=45% | 12525/12526/12518/15002/12500/12502/12503/16428/12501/16818/15001/12504/84544/66211/20299/55985/50931/12481/22637/20304/16186/12487/16678/21939/20849/16170/17329/23871/19264/26411/12516/16994/14423/17395/50723/22376/14537/16185/16408/18646/16643/16364/21838/16822/19073/56501/12766/12524/12010/17972/16149/14960/20452/12479/12519/18751/16174/16161/226419/15163/14938/16190/21897/16176/14191/16196/17076/79221/12703/12263/15170/16184/18636/16414/12444/21354/17084/15007/16173/12035/14130/17086/14998/14102/15976/14744/15900/21803 |
| HALLMARK_INTERFERON_GAMMA_RESPONSE | HALLMARK_INTERFERON_GAMMA_RESPONSE | HALLMARK_INTERFERON_GAMMA_RESPONSE | 187 | 0.6304543 | 1.989881 | 4.685858e-12 | 1.171464e-10 | 8.138595e-11 | 2306 | tags=48%, list=16%, signal=41% | 71939/242248/14990/110168/12515/20304/12984/16421/21939/20849/12265/17329/76074/434341/17472/16185/16364/236573/16822/60533/108670/12524/12010/16149/22329/626578/209086/14960/20452/14969/12363/12702/16154/16913/14938/16190/16196/75345/246727/18578/27056/12703/15170/20344/16912/12369/15945/12258/229003/21928/27007/21354/14962/15007/11861/109032/66892/19225/14998/14102/246728/70082/15976/15900/667277/21930/12362/18712/55932/14964/58203/56791/65972/234311/20306/15894/56045/14528/22035/16963/15959/21929/74481/22271/54123/12575/19171/20296/15958/209588 |
| HALLMARK_MYOGENESIS | HALLMARK_MYOGENESIS | HALLMARK_MYOGENESIS | 176 | 0.6055251 | 1.904287 | 1.692797e-09 | 2.821328e-08 | 1.960081e-08 | 1982 | tags=37%, list=14%, signal=32% | 17879/19293/21957/17884/76722/21925/17901/59011/12715/11537/17930/11474/14077/11459/11937/21953/17907/15464/12862/12372/21393/13346/11472/20190/19309/17189/56012/24053/24131/14199/13009/20856/14778/20249/17929/24052/17896/11489/16511/16985/16404/12865/13808/12491/17260/23796/19245/56188/29817/17873/11568/16009/18563/18128/11636/16000/21803/12842/226251/12759/17882/11423/69253/52250/227753 |
| HALLMARK_PROTEIN_SECRETION | HALLMARK_PROTEIN_SECRETION | HALLMARK_PROTEIN_SECRETION | 94 | -0.5899617 | -2.255769 | 1.941524e-08 | 2.426906e-07 | 1.686061e-07 | 3709 | tags=64%, list=26%, signal=47% | 16561/22319/56418/211673/211286/15893/71770/20844/56041/11774/107338/22088/11487/20479/17113/99371/100226/67830/22365/72736/271457/59042/108664/208449/99889/12757/213827/14420/16784/11840/19334/59021/11765/67074/56334/54214/20333/320634/11605/67300/20619/16004/107767/76332/26951/12512/69162/70349/20404/53331/12725/11977/50797/16668/21985/69608/77929/70361/18484/71943 |
| HALLMARK_INFLAMMATORY_RESPONSE | HALLMARK_INFLAMMATORY_RESPONSE | HALLMARK_INFLAMMATORY_RESPONSE | 171 | 0.5631777 | 1.765002 | 1.018982e-06 | 1.018982e-05 | 7.079240e-06 | 2636 | tags=46%, list=19%, signal=38% | 17167/12775/20343/16818/20299/12515/20304/56696/21939/16197/17329/269346/19419/68713/321019/50723/21942/80901/16185/16182/16822/54483/18413/20339/16174/56221/54371/19734/16154/11988/12506/16190/21897/16176/16992/23796/18578/320207/21338/50778/50498/15117/19217/12986/15945/14676/21938/23845/19219/20511/16173/13732/17869/21930/54598/380713/15465/20306/15894/14528/22035/13058/13136/11303/14723/54123/16878/12575/56212/20296/16323/18414/20354/216799/20266/69550/54216/64008/11541 |
| HALLMARK_ANDROGEN_RESPONSE | HALLMARK_ANDROGEN_RESPONSE | HALLMARK_ANDROGEN_RESPONSE | 107 | -0.4800640 | -1.875840 | 1.721447e-05 | 1.434539e-04 | 9.966273e-05 | 2403 | tags=42%, list=17%, signal=35% | 109711/56228/74754/20393/56847/56376/20229/229055/319554/12007/71817/21807/66884/53416/15357/17761/17289/16691/19341/107652/16410/13714/21985/14595/69608/70361/231070/109785/13521/54608/74205/16617/16613/16623/16622/16615/18048/16619/12167/18050/16616/13648/13646/16624/30051 |
| HALLMARK_IL6_JAK_STAT3_SIGNALING | HALLMARK_IL6_JAK_STAT3_SIGNALING | HALLMARK_IL6_JAK_STAT3_SIGNALING | 80 | 0.5908909 | 1.732533 | 2.274794e-04 | 1.421746e-03 | 9.877396e-04 | 2540 | tags=45%, list=18%, signal=37% | 55985/12984/16186/16199/17329/16994/16182/14825/12491/12702/16161/16190/16176/16196/16178/320207/12703/50498/16184/12986/15945/21938/12804/14102/21803/18712/20306/16172/11482/56744/18414/12494/20846/54635/16401/16188 |
| HALLMARK_TNFA_SIGNALING_VIA_NFKB | HALLMARK_TNFA_SIGNALING_VIA_NFKB | HALLMARK_TNFA_SIGNALING_VIA_NFKB | 183 | 0.4990590 | 1.574185 | 2.165493e-04 | 1.421746e-03 | 9.877396e-04 | 2439 | tags=37%, list=17%, signal=31% | 13537/12515/20304/13655/16197/22029/12522/16598/14282/321019/50723/21942/20621/20527/18037/170768/14825/19288/12519/17681/55991/12702/12044/12156/70789/16176/13654/18578/14281/56193/227659/13653/19696/15945/21928/14373/21354/19219/17873/16173/56336/15939/56708/19698/16449/19225/17869/21930/106869/11796/18124/15894/14528/21664/104681/21929/11303/16878/12575/14579/20296/15958/16323/99929/240672/57783/75234 |
| HALLMARK_KRAS_SIGNALING_UP | HALLMARK_KRAS_SIGNALING_UP | HALLMARK_KRAS_SIGNALING_UP | 181 | 0.5044412 | 1.588134 | 2.564028e-04 | 1.424460e-03 | 9.896247e-04 | 2464 | tags=41%, list=18%, signal=34% | 12683/57264/12493/16186/22778/16625/16197/12767/23871/22029/26411/57875/67468/19699/17395/20440/20255/94176/16792/18613/71683/19734/21788/16154/18671/16913/12156/23882/16176/67888/79221/18080/19092/66066/19279/15945/14373/16414/12444/21938/84094/12349/14962/20394/16009/16574/19225/11732/15900/56743/17385/93695/15483/74145/19662/11796/12142/104156/21929/14396/14257/13040/16878/13805/16398/83397/72309/18186/16323/18933/73149/16534/20266/77125 |
| HALLMARK_COMPLEMENT | HALLMARK_COMPLEMENT | HALLMARK_COMPLEMENT | 179 | 0.4985989 | 1.569030 | 3.341847e-04 | 1.670923e-03 | 1.160852e-03 | 3115 | tags=44%, list=22%, signal=34% | 12902/16818/11472/20304/27226/19419/22376/210293/16822/20255/14360/14825/14710/14462/12363/12491/22762/12266/14938/70789/80906/110197/13036/14069/320207/70574/14067/12263/56193/14702/16912/14696/12369/12258/21417/238130/12349/14962/17436/12569/667277/12362/18712/16923/12759/14268/380921/20701/11812/18132/13032/13136/21929/14723/13040/54123/21789/11808/56212/72269/12259/240672/13033/14939/20196/12262/12608/208650/17096/16612/13660/71753/14571/14678/14061/22074/15378/27361 |
| HALLMARK_E2F_TARGETS | HALLMARK_E2F_TARGETS | HALLMARK_E2F_TARGETS | 192 | 0.4900602 | 1.546815 | 4.935926e-04 | 2.243603e-03 | 1.558714e-03 | 4439 | tags=50%, list=32%, signal=35% | 19139/20135/21973/11799/17345/76131/218977/26934/12442/229841/70099/73804/20877/66570/12447/21853/16571/72391/12580/17121/17215/15366/67629/17869/66929/105988/17218/107995/54141/52276/12236/14793/70218/110033/18817/108961/22042/15201/53605/13178/16906/17089/12575/12704/18969/18973/19362/69270/67196/16765/67241/12021/22154/54124/12534/14056/17219/72107/17217/21877/69639/15361/22059/20937/19183/17216/12190/98386/53892/114714/50927/60364/17220/12531/66422/17279/97165/17865/11991/66197/66403/17768/18813/103733/78833/67134/18971/66442/30939/16647/13006/100336/69716/110052/100710/21335 |
| HALLMARK_IL2_STAT5_SIGNALING | HALLMARK_IL2_STAT5_SIGNALING | HALLMARK_IL2_STAT5_SIGNALING | 182 | 0.4669871 | 1.471813 | 2.235579e-03 | 9.314911e-03 | 6.471412e-03 | 2253 | tags=35%, list=16%, signal=29% | 20343/15985/54167/74734/13011/22029/12522/16407/16994/21942/16185/20527/16364/16182/384009/13813/12524/13808/19734/19228/22163/16154/18671/12156/12506/16190/14256/16178/18550/381319/12703/11492/100604/20344/19217/16184/15945/12444/21938/12349/18619/12447/11861/17873/12043/23834/14064/17988/20947/14744/170743/17869/15900/214547/18712/55932/21936/22035/21664/13601/11596/16878/17760 |
| HALLMARK_G2M_CHECKPOINT | HALLMARK_G2M_CHECKPOINT | HALLMARK_G2M_CHECKPOINT | 186 | 0.4506709 | 1.421167 | 6.322301e-03 | 2.431654e-02 | 1.689360e-02 | 3770 | tags=43%, list=27%, signal=32% | 12615/21973/11799/68612/17345/72119/16551/12235/71819/108907/26934/12442/78733/229841/70099/73804/20877/67052/242705/16571/12545/18005/72391/12580/23834/17215/12428/15366/17536/17869/105988/17218/108000/107995/21803/209737/12449/22137/110033/18817/67141/16906/52033/13555/19366/18969/18973/240641/16765/20539/12021/26909/54124/12534/233406/14056/22036/16319/73086/17219/60406/15361/20937/17216/12190/98386/11987/12443/50927/12531/17765/27221/17118/19650/238247/12544/17865/27214/11991/66197 |
| HALLMARK_ADIPOGENESIS | HALLMARK_ADIPOGENESIS | HALLMARK_ADIPOGENESIS | 192 | 0.4484965 | 1.415624 | 7.816571e-03 | 2.791632e-02 | 1.939450e-02 | 3818 | tags=38%, list=27%, signal=28% | 12683/11450/57264/11770/16846/77037/19016/19218/246747/57875/14778/13120/18606/17001/170768/16404/12491/12266/20509/16890/16956/20618/18405/17436/11816/20887/11861/12580/52538/170439/67834/66205/103172/11303/13350/27376/51798/23945/11364/14085/13850/109019/13602/14792/209378/21881/15979/12908/14571/68349/20778/66445/20850/27047/15929/26379/12896/11429/74185/11363/235339/110826/13476/13830/67460/22628/13382/15107/12053/27395/66142/66447/12974 |
| HALLMARK_APICAL_JUNCTION | HALLMARK_APICAL_JUNCTION | HALLMARK_APICAL_JUNCTION | 172 | 0.4465108 | 1.399748 | 1.194616e-02 | 3.982052e-02 | 2.766478e-02 | 2527 | tags=31%, list=18%, signal=26% | 69165/11474/11459/18175/11472/23912/18417/19354/22029/19264/14086/12741/15896/17395/21838/14026/60533/16511/12524/22329/320840/269116/23792/18613/20963/83964/60363/68794/54419/17698/73341/94332/15898/239857/21810/23797/13507/12798/12490/26412/245537/12805/20970/11548/15894/104099/16398/17390/105827/232975/23794/64540/16367 |
| HALLMARK_APOPTOSIS | HALLMARK_APOPTOSIS | HALLMARK_APOPTOSIS | 153 | 0.4426968 | 1.378980 | 1.596565e-02 | 4.989267e-02 | 3.466228e-02 | 2425 | tags=27%, list=17%, signal=22% | 16842/12481/12515/13655/14778/18796/380684/18646/58801/12363/12156/16176/21973/12257/12122/12369/16012/12444/14676/20515/21354/17873/235180/16173/14102/24014/11732/12362/12759/12389/13807/11796/227753/14528/22035/19401/13179/12575/17390/21814/12494 |
# Created ranked list for Par_vs_SM
ranked_Par_vs_SM <- create_ranked_list(results_Par_vs_SM)
# Run GSEA
gsea_Par_vs_SM <- GSEA(
ranked_Par_vs_SM,
TERM2GENE = hallmark_gene_sets,
pvalueCutoff = 0.05,
eps = 0, # Set eps to zero for better estimation
verbose = FALSE
)
# Extracted pathways with P-value == 1e-10 (extremely significant)
extremely_significant_Par_vs_SM <- gsea_Par_vs_SM@result %>%
filter(pvalue == 1e-10)
# Converted GSEA results to a data frame
gsea_results_df_Par_vs_SM <- as.data.frame(gsea_Par_vs_SM)
# Combined top 15 pathways with extremely significant ones
combined_top_Par_vs_SM <- gsea_results_df_Par_vs_SM %>%
arrange(p.adjust) %>%
head(15) %>%
bind_rows(extremely_significant_Par_vs_SM)
# Removed potential duplicates based on Description
combined_top_Par_vs_SM <- combined_top_Par_vs_SM %>%
distinct(Description, .keep_all = TRUE)
# Created a bar plot for Par_vs_SM
ggplot(combined_top_Par_vs_SM, aes(x = reorder(Description, NES), y = NES, fill = NES)) +
geom_bar(stat = "identity") +
coord_flip() +
scale_fill_gradient(low = "lightblue", high = "lightcoral") +
theme_minimal() +
labs(title = "Top Enriched Gene Sets - Par_vs_SM",
x = "Gene Set",
y = "Normalized Enrichment Score (NES)") +
theme(
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
axis.text = element_text(size = 12)
)
# Displayed the top 20 GSEA results in a scrollable table
kable(head(gsea_Par_vs_SM, 20), digits=50) %>% kable_styling("striped", full_width = FALSE, position = "center") %>% scroll_box(width = "100%", height = "500px")
| ID | Description | setSize | enrichmentScore | NES | pvalue | p.adjust | qvalue | rank | leading_edge | core_enrichment | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| HALLMARK_ALLOGRAFT_REJECTION | HALLMARK_ALLOGRAFT_REJECTION | HALLMARK_ALLOGRAFT_REJECTION | 172 | 0.7542856 | 2.211116 | 9.858377e-22 | 4.929189e-20 | 3.216944e-20 | 2263 | tags=62%, list=16%, signal=52% | 12526/16678/12500/12518/12525/12502/16428/15002/12501/12503/55985/12504/16818/16994/12481/50931/20299/84544/16186/22637/21939/12487/20304/66211/26411/20849/16364/16170/19264/17329/23871/22376/21838/12516/17076/15001/12519/19073/17395/16161/16408/56501/18646/20306/12703/17972/16185/12766/20296/20452/16643/16414/16190/12524/15163/18751/16149/14423/14960/50723/14991/12444/15894/16173/21926/18636/14998/16878/16174/226419/14102/16196/15170/79221/14999/14938/12010/14191/19171/14469/16184/16176/12051/216799/21354/21803/15976/12479/15900/13040/16822/17084/12263/17086/11423/80719/12189/56744/20846/12772/14744/14130/20085/15007/15979/21355 |
| HALLMARK_INTERFERON_GAMMA_RESPONSE | HALLMARK_INTERFERON_GAMMA_RESPONSE | HALLMARK_INTERFERON_GAMMA_RESPONSE | 187 | 0.6716847 | 1.978911 | 9.386127e-14 | 2.346532e-12 | 1.531421e-12 | 2654 | tags=55%, list=19%, signal=45% | 242248/71939/14990/110168/12984/12515/21939/16421/20304/20849/16364/17329/12265/76074/60533/75345/20306/12703/16185/22329/20296/108670/236573/20452/17472/16190/58203/246727/12363/12702/12524/209086/16149/14969/14960/21929/14991/15894/27056/626578/16912/14998/16154/12362/12494/14962/14102/16196/15170/16913/55932/12226/14938/15945/12010/56791/19171/12258/18712/209588/70082/12575/21354/22035/15976/20344/15959/27007/15900/434341/234311/246728/16822/547253/15958/21928/11861/20846/67138/667277/192786/69550/66892/65972/15007/14964/14190/56045/107607/67775/59027/229003/217069/19225/16362/57444/20821/54123/18035/327959/24110/19039 |
| HALLMARK_ANDROGEN_RESPONSE | HALLMARK_ANDROGEN_RESPONSE | HALLMARK_ANDROGEN_RESPONSE | 107 | -0.6667696 | -2.405666 | 4.014171e-12 | 6.690285e-11 | 4.366291e-11 | 787 | tags=24%, list=6%, signal=23% | 14229/30051/17132/328365/544963/74205/53416/21985/17761/13521/16612/21807/56847/20393/16613/16622/16617/16623/16615/18048/16619/13648/13646/16616/18050/16624 |
| HALLMARK_MYOGENESIS | HALLMARK_MYOGENESIS | HALLMARK_MYOGENESIS | 176 | 0.6268406 | 1.842645 | 5.253310e-09 | 6.566637e-08 | 4.285595e-08 | 2181 | tags=40%, list=16%, signal=34% | 17879/19293/11459/11474/21925/59011/21957/14077/17884/21953/17930/17901/17907/11537/20190/11937/76722/12862/21393/12715/15464/17189/12372/13346/11472/24131/20856/19309/20249/12491/56012/16985/14199/20657/14778/11489/17896/24052/17882/16511/13009/13808/17929/16404/12759/21955/17260/19245/56188/16000/12575/17873/12865/21803/12323/12554/23796/58226/16009/29817/11423/52250/18767/18563/69253/11568/12825/12017/14081/227753 |
| HALLMARK_INFLAMMATORY_RESPONSE | HALLMARK_INFLAMMATORY_RESPONSE | HALLMARK_INFLAMMATORY_RESPONSE | 171 | 0.6162798 | 1.805316 | 1.634422e-08 | 1.634422e-07 | 1.066675e-07 | 2323 | tags=46%, list=17%, signal=39% | 17167/12775/20343/16818/20299/16992/16197/12515/21939/56696/54483/20304/326623/269346/18413/17329/50778/80901/68713/233079/13136/20306/12506/21942/16185/20296/321019/16190/19217/19219/18793/50723/56221/15894/320207/20288/16173/21938/13732/16182/16878/16154/16174/54371/19734/12986/15945/11303/14676/54598/21950/19204/16176/12575/216799/22035/50498/14734/20339/23796/13058/15465/17387/19419/16822/16402/16175/54216/56212/20354/23845/17869/69550/16416/18414/67775/17096/59027 |
| HALLMARK_IL6_JAK_STAT3_SIGNALING | HALLMARK_IL6_JAK_STAT3_SIGNALING | HALLMARK_IL6_JAK_STAT3_SIGNALING | 80 | 0.6641959 | 1.825662 | 2.862486e-06 | 2.385405e-05 | 1.556791e-05 | 2261 | tags=46%, list=16%, signal=39% | 55985/16994/16186/12984/16199/14825/17329/12491/16401/16847/16161/16178/20306/12703/16190/12702/320207/21926/21938/16182/12494/14102/16196/12986/15945/16172/16184/18712/16176/21803/50498/16188/56744/20846/16416/15979/18414 |
| HALLMARK_COMPLEMENT | HALLMARK_COMPLEMENT | HALLMARK_COMPLEMENT | 179 | 0.5618742 | 1.651029 | 1.179749e-05 | 8.426777e-05 | 5.499581e-05 | 2810 | tags=40%, list=20%, signal=33% | 12902/17002/16818/14462/11472/20304/14825/12491/27226/22376/70574/13136/210293/17436/72461/14069/18793/12363/56193/21929/20255/12266/14696/12759/80906/14360/320207/14710/16912/12362/14962/14702/22762/14938/70789/12569/14268/21417/12258/18712/14067/13039/12554/16889/13040/17387/19419/16822/229445/13036/12263/14678/56212/667277/21789/18643/14571/17096/13032/13030/16362/14066/19141/54123/30955/380921/12262/16923/240672/12259/18783/13660 |
| HALLMARK_PROTEIN_SECRETION | HALLMARK_PROTEIN_SECRETION | HALLMARK_PROTEIN_SECRETION | 94 | -0.5236179 | -1.873083 | 1.544893e-05 | 9.655582e-05 | 6.301538e-05 | 3203 | tags=51%, list=23%, signal=40% | 20479/20333/56334/59042/11840/74006/17113/211673/20844/108123/213827/320634/22088/66251/70349/12068/59021/67074/67300/16004/11765/50797/12725/11977/54214/107338/69162/271457/16561/16668/69608/108664/56041/15893/20619/70361/107767/71943/18484/11928/99371/77929/20404/21985/110935/53331/14420/216350 |
| HALLMARK_E2F_TARGETS | HALLMARK_E2F_TARGETS | HALLMARK_E2F_TARGETS | 192 | 0.5475710 | 1.614163 | 4.564068e-05 | 2.535593e-04 | 1.654808e-04 | 4112 | tags=52%, list=29%, signal=37% | 76131/72391/11799/70099/66929/73804/20135/20877/12442/107995/17121/21973/18817/17345/70218/17215/17218/26934/12447/15366/13178/108961/218977/229841/110033/12580/12575/52276/67629/16906/14793/12534/16571/12531/17279/66570/19362/15201/54141/17219/18973/12236/20937/21853/97165/12189/17217/19139/67196/17869/98386/16765/69270/18969/22154/15361/17865/20878/21335/72107/114714/69639/21877/78833/20873/12021/17220/12704/60364/22059/56150/68219/53381/19183/17768/66131/17089/14056/16881/67134/18538/53605/19076/66403/19891/110052/12190/100710/19718/53892/100336/103573/16647/19384/69716/212377/50927/19355/18971 |
| HALLMARK_INTERFERON_ALPHA_RESPONSE | HALLMARK_INTERFERON_ALPHA_RESPONSE | HALLMARK_INTERFERON_ALPHA_RESPONSE | 89 | 0.6211967 | 1.723823 | 6.359629e-05 | 3.179814e-04 | 2.075247e-04 | 3617 | tags=61%, list=26%, signal=45% | 20343/68713/108670/16190/209086/16149/12799/16912/12362/16196/16913/55932/15945/12010/56791/14469/21354/15959/234311/246730/547253/15958/74153/67138/69550/65972/211329/15007/67775/229003/217069/16362/57444/20821/54123/24110/19039/67168/22169/74481/23960/19188/246696/319236/17069/66355/19124/66141/12370/17858/16363/16423/100038882/72075 |
| HALLMARK_TNFA_SIGNALING_VIA_NFKB | HALLMARK_TNFA_SIGNALING_VIA_NFKB | HALLMARK_TNFA_SIGNALING_VIA_NFKB | 183 | 0.5153072 | 1.514859 | 5.742110e-04 | 2.610050e-03 | 1.703401e-03 | 1681 | tags=31%, list=12%, signal=27% | 13537/16197/12515/20304/14825/55991/20621/13655/22029/12522/16598/20527/12519/18037/19288/170768/21942/20296/321019/19219/18793/19696/12702/56193/21929/50723/56708/15894/16173/21926/21923/13654/16878/17681/71712/56336/14282/227659/12044/12226/19698/12156/70789/15945/11303/21950/16176/14281/12575/12051/15939/14579/17873/21354/57783/106869 |
| HALLMARK_IL2_STAT5_SIGNALING | HALLMARK_IL2_STAT5_SIGNALING | HALLMARK_IL2_STAT5_SIGNALING | 182 | 0.5119814 | 1.506545 | 6.493635e-04 | 2.705681e-03 | 1.765813e-03 | 2255 | tags=37%, list=16%, signal=32% | 20343/15985/16994/54167/74734/16364/13011/384009/22029/12522/16407/20527/16178/22163/12506/12703/21942/16185/13808/16190/19217/13813/12524/14256/100604/12444/21938/16182/16878/11520/16154/12043/19228/11492/19734/67547/381319/20947/55932/12156/15945/18619/18671/12447/170743/21936/16184/53314/18712/14085/22781/17873/22035/20344/16188/18550/23834/15900/17988/64138/72729/11861/13601/17869/229521/14744/14190/15979 |
| HALLMARK_ADIPOGENESIS | HALLMARK_ADIPOGENESIS | HALLMARK_ADIPOGENESIS | 192 | 0.5014731 | 1.478273 | 1.496920e-03 | 5.757386e-03 | 3.757452e-03 | 2286 | tags=22%, list=16%, signal=19% | 12683/246747/11450/57264/11770/77037/16846/19016/12491/19218/13120/57875/14778/17436/170768/16956/18606/17001/16404/12266/11520/13830/11303/16890/23943/12580/14085/20618/16775/13358/170439/52538/11861/11816/67460/18405/18569/11933/15979/15926/66205/14571 |
| HALLMARK_G2M_CHECKPOINT | HALLMARK_G2M_CHECKPOINT | HALLMARK_G2M_CHECKPOINT | 186 | 0.4878541 | 1.439638 | 4.009933e-03 | 1.432119e-02 | 9.346461e-03 | 2886 | tags=38%, list=21%, signal=30% | 242705/12615/22137/72391/11799/70099/12235/73804/71819/20877/12442/16551/107995/26909/72119/21973/18817/18005/68612/17345/12428/17215/108907/17218/26934/15366/229841/110033/12580/67052/12051/21803/16906/240641/12534/16571/12531/23834/12449/108000/17219/18973/52033/78733/20937/67141/209737/17869/22036/98386/12545/16765/18969/16319/27214/20539/15361/17865/20878/21335/107503/17118/14211/20873/12021/56150/17536/233406/19650/20460 |
| HALLMARK_KRAS_SIGNALING_UP | HALLMARK_KRAS_SIGNALING_UP | HALLMARK_KRAS_SIGNALING_UP | 181 | 0.4858889 | 1.428489 | 5.523955e-03 | 1.841318e-02 | 1.201703e-02 | 2617 | tags=39%, list=19%, signal=32% | 12683/57264/12493/16186/16197/12767/22778/20568/26411/23871/22029/57875/94176/67468/16625/17395/93695/20440/16414/16792/18793/20394/71683/21929/20255/12444/21788/11501/21938/18613/16878/16154/14962/56743/19734/16913/58860/79221/11421/19699/12156/15945/18671/14396/16176/67888/14257/18703/15900/13040/73149/66066/16009/12142/20716/20616/11846/14063/14009/170744/74145/18933/19092/72309/11732/17385/19225/11796/20230/84094/83397 |
# Created ranked list for SL_vs_SM
ranked_SL_vs_SM <- create_ranked_list(results_SL_vs_SM)
# Run GSEA
gsea_SL_vs_SM <- GSEA(
ranked_SL_vs_SM,
TERM2GENE = hallmark_gene_sets,
pvalueCutoff = 0.05,
verbose = FALSE
)
#Skipping extremely significant step as there are very few enrichments in this group
#Plotting enrichment score for Hallmark Androgen Response
gseaplot2(gsea_SL_vs_SM, geneSetID = "HALLMARK_ANDROGEN_RESPONSE", title = "HALLMARK_ANDROGEN_RESPONSE")
END OF REPORT